Ejemplo n.º 1
0
def plot_uncorrected_phylogeny(tree, species, latin_names, species_history):
    """
    Generates a PDF figure of the input tree with same length for all branches.

    :param tree: input tree from configuration file
    :param species: the current focal species
    :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name
    :param species_history: the list of ancestor nodes of the focal species, including the focal species and going up to the root.
    """
    label_leaves_with_latin_names(tree, latin_names)
    node_and_branch_style(tree)
    ts = TreeStyle()
    # ts.title.add_face(TextFace("  Input phylogenetic tree", ftype="Arial", fsize=18), column=0)
    ts.orientation = 1
    ts.branch_vertical_margin = 14
    ts.show_leaf_name = False  # because there is a Face showing it
    ts.show_branch_length = False
    ts.margin_left = 25
    ts.margin_right = 25
    ts.margin_top = 25
    ts.margin_bottom = 25
    ts.scale = 200
    ts.show_scale = False
    tree.render(os.path.join("rate_adjustment", f"{species}",
                             f"{_TREE.format(species)}"),
                w=4.5,
                units="in",
                tree_style=ts)
Ejemplo n.º 2
0
def generate_type_tree_figure(output_file):
    """ Generate type_tree.png image.

    It needs ETE dependencies installed
    cf http://etetoolkit.org/new_download/ or use anaconda

    :param output_file: str
    """
    try:
        from ete3 import faces, TextFace, TreeStyle
    except ImportError as e:
        logger.warning(
            'ImportError : %s Generation of type_tree figure need ETE dependencies to '
            'be installed Use from anaconda, or look at installation procedure on '
            'http://etetoolkit.org/new_download/', e)
        return

    # Define custom tree style
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.show_scale = False
    ts.orientation = 1
    ts.branch_vertical_margin = 20

    def my_layout(node):
        F = TextFace(node.name, fsize=16, ftype='Courier', bold=True)
        faces.add_face_to_node(F, node, column=10, position="branch-right")

    ts.layout_fn = my_layout

    TYPES_STRING_TREE.render(output_file, tree_style=ts)
Ejemplo n.º 3
0
    def __init__(self, root_text):
        # Inicializa la estructura árbol
        tree = Tree()

        # Formateamos el árbol indicandole un estilo
        style = TreeStyle()
        style.show_leaf_name = True
        style.show_scale = False
        style.scale = 100
        style.branch_vertical_margin = 30
        style.rotation = 0
        style.orientation = 0
        self.style = style
        self.tree = tree

        # Le damos estilo a la raíz del árbol
        self.tree.add_face(TextFace(root_text, fsize=10, fgcolor='darkred'),
                           column=0,
                           position='branch-right')
        self.tree.set_style(NodeStyle(size=20, hz_line_width=2,
                                      fgcolor='blue'))

        # Almacenamos la raíz del árbol como nodo actual
        self.curr_node = self.tree
Ejemplo n.º 4
0
[rows, columns] = treeStruct.shape

root = Tree()
node_cur = root

'''#######################
 Tree Style Begin
'''
ts = TreeStyle()
ts.title.add_face(TextFace("Tree example", fsize=8), column=0)
ts.scale = 50
ts.mode = 'r'

# left or right
ts.orientation = 1

ts.rotation = 270
ts.show_leaf_name = False
ts.show_branch_length = True
#ts.show_branch_length = True
'''
 Tree Style End
#######################'''




'''#######################
 Node Style Begin
'''
Ejemplo n.º 5
0
os.system("clear")
filegene = sys.argv[1]          #fichier de l'arbre de gènes
filespecies = sys.argv[2]       #fichier de l'arbre d'espces
filemap = sys.argv[3]           #fichier de correspondance entre gene et espece

#generation des structures d'arbres à partir des fichiers
dictmap = newickparser.mapParser(filemap)
rootgene = node.node(name ="root", gen = 0, enfants = [])
rootspecies = node.node(name = "root", gen =0, enfants = [])
arbrespecies = newickparser.lectureArbre(filespecies)
if arbrespecies != 1 :
    newickparser.parser(None, rootspecies, 0, arbrespecies.strip())
arbregene = newickparser.lectureArbre(filegene)
if arbregene != 1 :
    newickparser.parser(None, rootgene, 0, arbregene.strip())
newickparser.gene_to_species(rootgene, dictmap)

print (rootgene,"\n")
print (rootspecies)

#affichage graphique des arbres et generation des fichiers svg
genetree = Tree(arbregene)
spectree = Tree(arbrespecies)
ts = TreeStyle()
ts.mode = "r"
ts.orientation = 1
genetree.show()
spectree.show(tree_style=ts)
genetree.render("genetree.svg")
spectree.render("spectree.svg")
Ejemplo n.º 6
0
def plotting_tree(species, latin_names, original_tree, correction_table,
                  consensus_strategy_for_multi_outgroups, ortholog_db,
                  peak_stats, nextflow_flag):
    """
    Generate a PDF figure of the input tree with branch lengths equal to Ks distances.
    If it is not possible to compute the branch length for a branch, the branch line is dashed. This happens when some\\
    ortholog data to compute the branch-specific Ks contribution are missing.

    :param species: the current focal species
    :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name
    :param original_tree: Newick tree format of the phylogenetic tree among the involved species
    :param correction_table: adjustment results in DataFrame format (contains both possible types of consensus strategy for how to deal with multiple outgroups)
    :param consensus_strategy_for_multi_outgroups: user choice about which consensus strategy to use when dealing with multiple outgroups
    :para ortholog_db: ortholog peak database used to get ortholog data for the relative rate test; if not available, will be ignored
    :param peak_stats: flag to specify whether the ortholog distribution peak is the mode or the median
    :param nextflow_flag: boolean flag to state whether the script is run in the Nextflow pipeline or not
    """
    # Get an equivalent tree where the focal species is the top leaf
    tree = reorder_tree_leaves(original_tree, species)
    node_and_branch_style(tree)

    species_node = get_species_node(species, tree)

    labeling_internal_nodes(species_node)
    species_history = get_species_history(species_node)
    rate_species_dict, rate_sister_dict = {}, {}

    for ancestor_node in species_history[:-2]:
        # NOTE: at the moment the following function is only used to fill in the dictionaries of branch-specific Ks contributions
        average_peak_of_divergence_event, margin_error_box, error_text = get_branch_length_and_errorbox(
            species, ancestor_node, correction_table,
            consensus_strategy_for_multi_outgroups, latin_names,
            rate_species_dict, rate_sister_dict)

        # Adding the branch length to the focal species node, otherwise it lacks it
        if ancestor_node.name == species:
            ancestor_node.dist = rate_species_dict[species]
            draw_branch_length_label(ancestor_node, known_distance=True)

        # Adding as TextFaces both the divergent Ks of the node (as mean) and the error range (left-most and right-most boundaries)
        divergence_node = ancestor_node.up  # getting parent node, where the current divergence takes place
        divergence_node.add_feature("rate_species", rate_species_dict[species])
        divergence_node.add_feature("avg_peak",
                                    round(average_peak_of_divergence_event, 2))
        divergence_node.add_feature("margins",
                                    f"({error_text[0]}, {error_text[1]})")
        ### divergence_node.add_face(AttrFace("margins", fsize=5), column=0, position="branch-right") [ NOT USED FOR NOW ]

    # Setting the branch length of the nodes belonging to the speciation history of the focal species
    for divergence_node in species_history[1:]:
        parent_node = divergence_node.up
        try:
            divergence_node.dist = round(
                parent_node.rate_species - divergence_node.rate_species, 3)
            draw_branch_length_label(divergence_node, known_distance=True)
        except Exception:
            divergence_node.dist = 10  # impossible number to flag an unknown length
            draw_branch_length_label(divergence_node, known_distance=False)
            unknown_branch_len_style(divergence_node)

    if ortholog_db.empty:  # branch-specific Ks contributions can be obtained only from adjustment_tables
        logging.info(
            "Getting branch-specific Ks contributions from rate-adjustment table data"
        )
    else:  # if the ortholog DB is available, we can try to compute the branch-specific Ks contributions from there too
        logging.info(
            "Getting branch-specific Ks contributions from rate-adjustment table data"
        )
        logging.info(
            "Computing branch-specific Ks contributions from ortholog peak data in database by applying principles of the relative rate test"
        )

    rate_dict = {}
    get_rates_from_current_analysis(rate_dict, correction_table, species,
                                    species_history, latin_names)

    # Setting the branch length of the other remaining nodes
    missing_ortholog_data_from_database = False
    missing_ortholog_data_from_correction_table = False

    for node in species_history[:-1]:
        sister_node = node.get_sisters(
        )  # is a list containing the sister NODE (it's only ONE node)

        if not ortholog_db.empty:  # if there is an ortholog database that can help with computing the missing branch lengths
            if len(sister_node[0].get_leaves()) > 1:
                missing_ortholog_data_from_database = get_rates_from_ortholog_peak_db(
                    rate_dict, sister_node, latin_names, ortholog_db,
                    peak_stats, missing_ortholog_data_from_database)
            else:
                if sister_node[0].name in rate_sister_dict.keys(
                ):  # if leaf has known length
                    sister_node[0].dist = rate_sister_dict[sister_node[0].name]
                    draw_branch_length_label(sister_node[0],
                                             known_distance=True)
                else:  # if the leaf has unknown length
                    sister_node[
                        0].dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(sister_node[0],
                                             known_distance=False)
                    unknown_branch_len_style(sister_node[0])

        else:  # if ortholog database not available (the variable was previously set as an empty dataframe)
            if len(sister_node[0].get_leaves()) > 1:
                missing_ortholog_data_from_correction_table = True  # correction_tables is not enough to know all branch lengths!
                sister_node[
                    0].dist = 10  # impossible number to flag an unknown length
                draw_branch_length_label(sister_node[0], known_distance=False)
                unknown_branch_len_style(sister_node[0])
                for node in sister_node[0].get_descendants():
                    node.dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(node, known_distance=False)
                    unknown_branch_len_style(node)
            else:
                leaf = sister_node[0].get_leaves()[0]  # there is only one leaf
                if leaf.name in rate_sister_dict.keys():
                    leaf.dist = rate_sister_dict[leaf.name]
                    draw_branch_length_label(leaf, known_distance=True)
                else:  # if the leaf has unknown length
                    leaf.dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(leaf, known_distance=False)
                    unknown_branch_len_style(leaf)

    # If the ortholog peak database is lacking some required data (must have been deleted by the user) or
    # if the peak database has been deleted and only the correction_table has been used for the branch contributions, gives a warning
    if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table:
        logging.warning("")
        logging.warning(
            "One or more branch lengths are unknown (dashed line) due to missing ortholog distribution peak data"
        )

    # If in Nextflow mode, tell the user to wait until the pipeline is finished in order to have all branch lengths
    if nextflow_flag:
        if missing_ortholog_data_from_database:
            logging.info(
                f"As soon as new ortholog data will become available, the tree branch lengths will be updated"
            )
    # If manual mode, tell the user how to get a complete branch tree (probably they deleted some data in the peak database)
    else:
        if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table:
            logging.warning(
                f"It's necessary to run a new Nextflow (or manual) pipeline to complete the tree branch length information"
            )

    label_leaves_with_latin_names(tree, latin_names)
    adapt_unknown_branch_length(tree)

    ts = TreeStyle()
    # ts.title.add_face(TextFace("  Input tree with branch length equal to Ks distances  ", ftype="Arial", fsize=18), column=0)
    ts.orientation = 1
    ts.branch_vertical_margin = 14
    ts.show_leaf_name = False  # because there is a Face showing it
    ts.show_branch_length = False
    ts.margin_left = 25
    ts.margin_right = 25
    ts.margin_top = 25
    ts.scale = 200
    #ts.scale_length =  # to set a fixed scale branch length
    root_of_corrected_tree = species_history[-1]
    root_of_corrected_tree.render(os.path.join(
        "rate_adjustment", f"{species}",
        f"{_TREE_BRANCH_DISTANCES.format(species)}"),
                                  w=4.5,
                                  units="in",
                                  tree_style=ts)
Ejemplo n.º 7
0
            node_style["size"] = 100
            node_style["fgcolor"] = "#66000000"

    node.set_style(node_style)


for output_string, mode, layout_fn in zip(
    [
        "tree_symbols",
        "tree_symbols",
        "tree_arrows",
    ], ["c", "r", "r"],
    [botstrap_lower_right, botstrap_lower_right, botstrap_symbols]):
    stars_style = TreeStyle()
    stars_style.layout_fn = layout_fn
    stars_style.orientation = tree_orientation
    stars_style.mode = mode
    for subtype in subtype_color_dict:
        stars_style.legend.add_face(RectFace(10, 10,
                                             subtype_color_dict[subtype],
                                             subtype_color_dict[subtype]),
                                    column=0)
        stars_style.legend.add_face(TextFace(
            subtype, fgcolor=subtype_color_dict[subtype], bold=True),
                                    column=1)
    stars_style.legend_position = 2
    ref_tree.render(file_name="plots/{}_{}_{}.pdf".format(
        plot_prefix, output_string, mode),
                    tree_style=stars_style)
print("Done.")