예제 #1
0
import ete3


### Example tree exported from phyphy, with .extract_absrel_tree(), with the feature `Selected` ###
mytree =  "(0564_7:0.00708844[&&NHX:Selected=0],(((((0564_11:0.00527268[&&NHX:Selected=0],0564_4:0.00714182[&&NHX:Selected=0])Node20:0.0022574[&&NHX:Selected=0],(0564_1:0.00583239[&&NHX:Selected=0],(0564_21:0.00121537[&&NHX:Selected=0],0564_5:0.00266921[&&NHX:Selected=0])Node25:0.000797211[&&NHX:Selected=0])Node23:0.00142056[&&NHX:Selected=0])Node19:0.0019147[&&NHX:Selected=0],0564_17:0.00605582[&&NHX:Selected=0])Node18:0.00100178[&&NHX:Selected=0],((0564_13:0.0053066[&&NHX:Selected=0],(0564_15:0.00346989[&&NHX:Selected=0])Node32:0.000752206[&&NHX:Selected=0])Node30:0.00188243[&&NHX:Selected=0],((0564_22:0.00686981[&&NHX:Selected=0],0564_6:0.00581523[&&NHX:Selected=0])Node36:0.00125905[&&NHX:Selected=0],0564_3:0.00791919[&&NHX:Selected=1])Node35:0.0174886[&&NHX:Selected=1])Node29:0.0010489[&&NHX:Selected=0])Node17:0.00156911[&&NHX:Selected=0],0564_9:0.00551506[&&NHX:Selected=0])Node16:0.000783733[&&NHX:Selected=0],(((0557_24:0.00078793[&&NHX:Selected=0],0557_4:0.000787896[&&NHX:Selected=0],0557_2:0.000399166[&&NHX:Selected=0])Node9:0.00206483[&&NHX:Selected=0],0557_12:0.00267531[&&NHX:Selected=0])Node8:0.00118205[&&NHX:Selected=0],((0557_21:0[&&NHX:Selected=0],0557_6:0.000391941[&&NHX:Selected=0],0557_9:0.000402021[&&NHX:Selected=0],0557_11:0.00156985[&&NHX:Selected=0],0557_13:0.000401742[&&NHX:Selected=0],0557_26:0.00079377[&&NHX:Selected=0],(0557_5:0.00117641[&&NHX:Selected=0],0557_7:0[&&NHX:Selected=0])Node53:0.000391973[&&NHX:Selected=0])Node6:0.00118062[&&NHX:Selected=0],0557_25:0.00220372[&&NHX:Selected=0])Node7:0.00103489[&&NHX:Selected=0])Separator:0.00822051[&&NHX:Selected=1])[&&NHX:Selected=0];"

### Read in to ete3, specifying format=1
t = ete3.Tree( mytree, format=1 )

## Define a treestyle, to show both leaf names and branch lengths in output
ts = ete3.TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True

## Define node styles, specifying that selected branches be colored red and non-selected branches be colored black.
style_selected = ete3.NodeStyle()
style_selected["vt_line_color"] = "red"
style_selected["hz_line_color"] = "red"

style_notselected = ete3.NodeStyle()
style_notselected["vt_line_color"] = "black"
style_notselected["hz_line_color"] = "black"

### Set style for nodes by traversing tree and applying styles where appropriate
### Note that all features are strings in the tree, and the `.Selected` feature comes directly from the feature string in the tree itself
for node in t.traverse("preorder"):
    if node.Selected=="1":
        node.set_style(style_selected)
    elif node.Selected=="0":
        node.set_style(style_notselected)
예제 #2
0
파일: phylogeny.py 프로젝트: menis/abutils
def _make_tree_figure(
        tree,
        fig,
        colors,
        orders,
        root_name,
        scale=None,
        branch_vert_margin=None,
        fontsize=12,
        show_names=True,
        name_field='seq_id',
        rename_function=None,
        color_node_labels=False,
        label_colors=None,
        tree_orientation=0,
        min_order_fraction=0.1,
        show_root_name=False,
        chain=None,
        # linked_alignment=None, alignment_fontsize=11, alignment_height=50, alignment_width=50,
        compact_alignment=False,
        scale_factor=1,
        linewidth=1,
        show_scale=False,
        ladderize=True,
        delete_nodes=None):
    if delete_nodes is None:
        delete_nodes = []
    elif type(delete_nodes) in STR_TYPES:
        delete_nodes = [
            delete_nodes,
        ]
    if show_root_name is True:
        show_names.append(root_name)
    # if linked_alignment is not None:
    #     t = ete3.PhyloTree(tree, alignment=linked_alignment, alg_format='fasta')
    #     ete3.faces.SequenceItem = MySequenceItem
    else:
        t = ete3.Tree(tree)
    if root_name is not None:
        t.set_outgroup(t & root_name)
    # style the nodes
    for node in t.traverse():
        if node.name in delete_nodes:
            node.delete()
            continue
        if orders is not None:
            leaves = node.get_leaf_names()
            order_count = Counter([orders[l] for l in leaves])
            for order in sorted(order_count.keys()):
                if float(order_count[order]) / len(
                        leaves) >= min_order_fraction:
                    color = colors[order]
                    break
        else:
            color = colors.get(node.name, '#000000')
        if linked_alignment is not None:
            node.add_feature('aln_fontsize', alignment_fontsize)
            node.add_feature('aln_height', alignment_height)
            node.add_feature('aln_width', alignment_width)
            node.add_feature('fontsize', fontsize)
            node.add_feature('format', 'seq')
            node.add_feature('scale_factor', scale_factor)
        style = ete3.NodeStyle()
        style['size'] = 0
        style['vt_line_width'] = float(linewidth)
        style['hz_line_width'] = float(linewidth)
        style['vt_line_color'] = color
        style['hz_line_color'] = color
        style['vt_line_type'] = 0
        style['hz_line_type'] = 0
        if show_names is True:
            tf = _build_node_text_face(node, color_node_labels, color,
                                       label_colors, fontsize, rename_function)
            node.add_face(tf, column=0)
        elif node.name in show_names:
            tf = _build_node_text_face(node, color_node_labels, color,
                                       label_colors, fontsize, rename_function)
            node.add_face(tf, column=0)
        node.set_style(style)
    t.dist = 0
    ts = ete3.TreeStyle()
    # if linked_alignment is not None:
    #     ts.layout_fn = _phyloalignment_layout_function
    ts.orientation = tree_orientation
    ts.show_leaf_name = False
    if scale is not None:
        ts.scale = int(scale)
    if branch_vert_margin is not None:
        ts.branch_vertical_margin = float(branch_vert_margin)
    ts.show_scale = show_scale
    if ladderize:
        t.ladderize()
    t.render(fig, tree_style=ts)
예제 #3
0
        '#ffee33', '#e9debb', '#ffcdf3', '#ffffff'
    ]
    pal2 = ['#a0a0a0', '#e9debb'] * 4 + ['#a0a0a0'
                                         ] + ['#a0a0a0', '#e9debb'] * 4
    pal2 = ['#a0a0a0', '#e9debb'] * 8
    bgcol = iter(pal16[2:])
    bgcol = iter(pal2)

    nodestyles = {}
    for node in tree.iter_leaves():
        binom, spp, common = mapname(node.name)
        if (binom, spp) in nodestyles:
            nodestyles[(binom, spp)][1].append(node)
            continue
        col = next(bgcol)
        s = ete3.NodeStyle()
        s["bgcolor"] = col
        #s["hz_line_color"] = s["vt_line_color"] = col
        #s["hz_line_width"] = s["vt_line_width"] = 1
        nodestyles[(binom, spp)] = (s, [node])

    import copy

    labelnodes = {}
    for (binom, spp), (ns, nodelist) in nodestyles.iteritems():
        if len(nodelist) == 1:
            anc = nodelist[0]
        else:
            anc = tree.get_common_ancestor(*nodelist)
        #labelnodes[nodelist[len(nodelist)/2]] = mapname(nodelist[0].name)
        labelnodes[nodelist[0]] = mapname(nodelist[0].name)
예제 #4
0
    'd1.names.nwk', 'd2_EDITED.names.nwk', 'e1.names.nwk', 'e2.names.nwk'
]

#newick_dir = '/Volumes/page_lab/users/lsteitz/1000_Genomes_Male_Figures/Phylogenetic_Trees/'
#newicks = ['tree.all.nwk']

yvarfile = open(
    '/Volumes/page_lab/users/lsteitz/1000_Genomes_Male_Figures/Phylogenetic_Trees/1000_Ys_variant_men.txt',
    'r')
yvars = {}
for line in yvarfile:
    if not line.startswith('#'):
        data = line.split()
        yvars[data[0]] = data[1]

varstyle = ete3.NodeStyle()
varstyle['fgcolor'] = 'red'

for newick in newicks:
    t = ete3.Tree('%s%s' % (tree_dir, newick), format=1)

    #Remove nodes not in my data
    good_leaves = []
    for leaf in t.iter_leaves():
        if leaf.name.split('_')[0] in yvars:
            good_leaves.append(leaf.name)
    t.prune(good_leaves)

    # 	for node in t.search_nodes():
    # 		nodename = node.name.split('_')[0]
    # 		if node.is_leaf():
def return_treestyle_with_columns(cmapvector):
    '''
    Need column names again to print header in order 
    '''
    [d_seq_color, d_seq_label, width_and_names] = cmapvector
    rect_width = [x[0] for x in width_and_names]
    column_names = [x[1] for x in width_and_names]
    label_font_size = 7

    # default node (not used since it's lost with any customisation, so we create all node styles independently)
    ns1 = ete3.NodeStyle()
    ns1["size"] = 1
    ns1["shape"] = "square"
    ns1["fgcolor"] = "101010"
    ns1["hz_line_type"] = ns1[
        "vt_line_type"] = 0  # 0=solid, 1=dashed, 2=dotted
    ns1["hz_line_color"] = ns1["vt_line_color"] = "darkred"

    def tree_profile_layout(
        node
    ):  # prepare table and other node information (local function so mind the identation)
        if "NORW" in (getattr(node, "submission_org_code")):
            this_color = "darkred"
        else:
            this_color = "#080816"

        node.img_style['hz_line_type'] = node.img_style[
            'vt_line_type'] = 0  # 0=solid, 1=dashed, 2=dotted
        node.img_style['hz_line_width'] = node.img_style['vt_line_width'] = 4
        node.img_style['hz_line_color'] = node.img_style[
            'vt_line_color'] = this_color

        if node.is_leaf(
        ):  # the aligned leaf is "column 0", thus traits go to column+1
            node.img_style['size'] = 2
            node.img_style['shape'] = "sphere"
            node.img_style['fgcolor'] = this_color
            ete3.add_face_to_node(ete3.AttrFace("name",
                                                fsize=label_font_size,
                                                text_suffix="   "),
                                  node,
                                  0,
                                  position="aligned")
            for column, (rgb_val, lab, wdt) in enumerate(
                    zip(d_seq_color[node.name], d_seq_label[node.name],
                        rect_width)):
                label = {
                    "text": lab[:10],
                    "color": "Black",
                    "fontsize": label_font_size - 1
                }
                ete3.add_face_to_node(ete3.RectFace(wdt,
                                                    12,
                                                    fgcolor=rgb_val,
                                                    bgcolor=rgb_val,
                                                    label=label),
                                      node,
                                      2 * column + 1,
                                      position="aligned")
                ete3.add_face_to_node(ete3.RectFace(2,
                                                    12,
                                                    fgcolor="#ffffff",
                                                    bgcolor="#ffffff",
                                                    label=""),
                                      node,
                                      2 * column + 2,
                                      position="aligned")
        else:
            node.img_style['size'] = 0

    ts = ete3.TreeStyle()
    ts.draw_guiding_lines = True  # dotted line between tip and name
    ts.guiding_lines_color = "#f4f4f4"  # "#bdb76d"
    ts.guiding_lines_type = 2  #  0=solid, 1=dashed, 2=dotted
    ts.layout_fn = tree_profile_layout
    ts.branch_vertical_margin = 0
    ts.min_leaf_separation = 1  # Min separation, in pixels, between two adjacent branches
    ts.scale = 2000000  # 2e6 pixels per branch length unit (i.e. brlen=1 should be how many pixels?)
    ts.show_scale = False
    show_branch_length = True
    ts.show_leaf_name = False  # we handle this in the layout function

    ## STILL dont know how to do it
    #ts.legend.add_face(CircleFace(10, "red"), column=0)
    #ts.legend.add_face(TextFace("0.5 support"), column=1)
    #ts.legend_position = 3 #  TopLeft corner if 1, TopRight if 2, BottomLeft if 3, BottomRight if 4
    for col, label in enumerate(column_names):  # the first are tip labels
        labelFace = ete3.TextFace(
            label, fsize=9,
            fgcolor="DimGray")  # fsize controls interval betweel columns
        labelFace.rotation = 270
        labelFace.vt_align = 1  # 0 top, 1 center, 2 bottom
        labelFace.hz_align = 1  # 0 left, 1 center, 2 right
        ts.aligned_header.add_face(labelFace, 2 * col + 1)
    return ts
예제 #6
0
def Main():
    if len(sensitivePath) > 0:
        sensitive_meta_data = SensitiveMetadata()

    metadata = ParseWorkflowResults(metadataPath)
    distance = read(distancePath)
    treeFile = "".join(read(treePath))

    distanceDict = {}  #store the distance matrix as rowname:list<string>
    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]

    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key].plasmids:
            for inc in plasmid.PlasmidRepType.split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key].ID]
                    else:
                        if metadata[key].ID not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key].ID)
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadat @ chris
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    (t & "Reference").add_face(addFace(sensitive_data_column),
                                               index, "aligned")
                    index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Plasmid Best Match"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Best Match Identity"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadata @ chris
                # pushing in sensitive data
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    # tree uses bcids like BC18A021A_S12
                    # while sens meta-data uses BC18A021A
                    # trim the "_S.*" if present
                    bcid = str(mData.ID)
                    if bcid.find("_S") != -1:
                        bcid = bcid[0:bcid.find("_S")]
                    sens_col_val = sensitive_meta_data.get_value(
                        bcid=bcid, column_name=sensitive_data_column)
                    n.add_face(addFace(sens_col_val), index, "aligned")
                    index = index + 1

            if (n.name.replace(".fa", "") in metadata.keys()):
                mData = metadata[n.name.replace(".fa", "")]
            else:
                mData = metadata["na"]
            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (mData.new == True):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.SequenceType), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.CarbapenemResistanceGenes), index,
                       "aligned")
            index = index + 1
            n.add_face(addFace(mData.plasmidBestMatch), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.plasmididentity), index, "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                if (n.name in distanceDict
                    ):  #make sure the column is in the distance matrice
                    n.add_face(addFace(list(distanceDict[n.name])[i]),
                               index + i, "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png, pdf, svg or an phyloxml
예제 #7
0
파일: domtree.py 프로젝트: sklas/pyDomrates
def print_tree(tree):
    # set node style
    nstyle = ete3.NodeStyle()
    nstyle["shape"] = "sphere"
    nstyle["size"] = 0
    nstyle["fgcolor"] = "darkred"
    nstyle["vt_line_width"] = 7
    nstyle["hz_line_width"] = 7

    for node in tree.traverse():
        if node.is_leaf():
            # node.add_face(ete3.TextFace(len(node.arrangements), fsize=30), column=1, position="aligned")
            node.add_face(ete3.TextFace(len(node.domains), fsize=30),
                          column=1,
                          position="aligned")
        else:
            # node.add_face(ete3.TextFace(str(len(node.arrangements))+" ", fsize=25), column=0, position="branch-bottom")
            node.add_face(ete3.TextFace(str(len(node.domains)) + " ",
                                        fsize=25),
                          column=0,
                          position="branch-bottom")
            node.add_face(ete3.TextFace(node.name + " ", fsize=25),
                          column=0,
                          position="branch-bottom")
        node.add_face(ete3.TextFace("+" + str(len(node.gained_domains)),
                                    fgcolor="green",
                                    fsize=25),
                      column=0,
                      position="branch-top")
        # node.add_face(ete3.TextFace("+"+str(len(node.gained_arr)), fgcolor="green", fsize=25), column=0, position="branch-top")
        node.add_face(ete3.TextFace("-" + str(len(node.lost_domains)),
                                    fgcolor="red",
                                    fsize=25),
                      column=0,
                      position="branch-top")
        # node.add_face(ete3.TextFace("-"+str(len(node.lost_arr)), fgcolor="red", fsize=25), column=0, position="branch-top")
        node.set_style(nstyle)

    def layout(node):
        N = ete3.AttrFace("name", fsize=30)
        N.margin_right = 10
        if node.is_leaf():
            if node.name == "Orussus_abietinus":
                N.background.color = "lightblue"
            elif node.name == "Athalia_rosae":
                N.background.color = "lightgreen"

            ete3.faces.add_face_to_node(N, node, 0, position="aligned")

    ts = ete3.TreeStyle()
    ts.show_leaf_name = False
    ts.draw_guiding_lines = True
    # ts.guiding_lines_type = 0
    ts.extra_branch_line_color = "black"

    ts.extra_branch_line_type = 1
    ts.show_scale = False
    ts.layout_fn = layout
    ts.optimal_scale_level = "mid"

    tree.render("tree_gain_loss_domains_20160211.pdf",
                tree_style=ts,
                w=199,
                units="mm")
예제 #8
0
def main(treefile,
         outfile=None,
         cladelistfile=None,
         datafile=None,
         prune=False,
         log_dist=False,
         values=None,
         quoted_node_names=False,
         startmatch=False):

    global tot_clades
    tot_clades = 0

    tree = ete3.Tree(treefile, format=1, quoted_node_names=quoted_node_names)
    if cladelistfile:
        with open(cladelistfile) as f:
            cladelist = set((line.rstrip().split('\t')[0] for line in f if \
                                not line.startswith('#')))
    else:
        cladelist = set()

    if datafile:
        assert values
        data = pd.read_csv(datafile, sep='\t', index_col=0, header=0)
        assert all(val in data.columns for val in values)
        value_face = {
            val: ete3.AttrFace(val,
                               formatter=' %.2f ',
                               fgcolor='grey',
                               fsize=8)
            for val in values
        }
        max_val = data[values].max().max()
        min_val = data[values].min().min()
        average_val = data[values].mean().mean()

    alt_col = ['#7FD09C', '#4cbdad']

    ns = [ete3.NodeStyle(bgcolor=col) for col in alt_col]
    default_ns = ete3.NodeStyle(size=0)
    labelface = ete3.AttrFace('name')

    if startmatch:
        test = lambda node: any(
            node.name.startswith(clade) for clade in cladelist) and not prune
    else:
        test = lambda node: node.name in cladelist and not prune
    #for node in tree.traverse():
    def mylayout(node):
        global tot_clades
        if log_dist:
            node.add_feature('orig_dist', node.dist)
            node.dist = log10(
                node.dist
            ) if node.dist > 0 else -log10(-node.dist) if node.dist < 0 else 0

        if test(node):
            node.set_style(ns[0])
            ns.insert(0, ns.pop())  #Cycle through colors
            if not node.is_leaf():
                ete3.add_face_to_node(labelface,
                                      node,
                                      column=0,
                                      position='branch-right')
            tot_clades += 1
        else:
            node.set_style(default_ns)

        if datafile and node.is_leaf():
            node.add_feature('profile',
                             [data[val][node.name] for val in values])
            node.add_feature('deviation', [0.0] * len(values))
            heatface = ete3.ProfileFace(max_val,
                                        min_val,
                                        average_val,
                                        width=20 * len(values),
                                        height=20,
                                        style='heatmap')
            ete3.add_face_to_node(heatface, node, column=1, aligned=True)

            for i, val in enumerate(values, start=2):
                node.add_feature(val, data[val][node.name])
                ete3.add_face_to_node(value_face[val],
                                      node,
                                      column=i,
                                      aligned=True)

    if prune:
        tree.prune(cladelist, preserve_branch_length=True)
        tree.dist = 0

    if outfile:
        tree.render(outfile, mylayout)
    else:
        tree.show(layout=mylayout)

    # Print summary
    print("Found %d clades" % tot_clades)
예제 #9
0
파일: plottree.py 프로젝트: A-Farhan/cova
def main_fun(dr, ftree, fplot, fst, fld, typef, branch_scale, branch_support,
             show_legend, legend_box_size, max_legend_stack, legend_font_size,
             img_height, img_dpi, show, typecoldict):
    ## paths
    ftree = os.path.join(dr, ftree)
    if fplot is None:
        fplot = ftree.replace('.nwk', '.png')
    fst = os.path.join(dr, fst)

    ## checks
    # tree file is present
    if not os.path.exists(ftree):
        raise FileNotFoundError('tree file %s must be present.' % ftree)

    # should you proceed if the output path already exists
    if not utils.outcheck(fplot):
        return

    # plot file has png suffix
    if fplot.split('.')[-1] != 'png':
        raise ValueError('output file must have suffix "png".')

    # if info file ( for sequence types) is provided, it is a valid file path
    if fld:
        click.echo("Location file provided.")
        fld = os.path.join(dr, fld)

        if not os.path.exists(fld):
            raise FileNotFoundError("couldn't find the file %s." % fld)

    else:
        click.echo(
            "No location file! Annotation will only be for sequence types")

    # load tree
    t = ete3.Tree(ftree)
    # list of leaves
    leaves = t.get_leaf_names()

    ## create treestyle
    ts = ete3.TreeStyle()
    ts.show_branch_support = branch_support
    ts.mode = "c"
    ts.scale = branch_scale

    ### types #####################################
    # table of genomes and their sequence types
    typedata = utils.readcsv(fst)
    # threshold for a type to be shown explicitly in the figure
    th = len(typedata) * typef
    # dict of sequence type with isolates
    type_isols = utils.split_data(data=typedata, ix=1, cixs=0)
    # empty list of types to be removed
    rmkeys = []
    # empty list of such minor isolates
    minors = []

    # for every type and its isolates
    for k, v in type_isols.items():

        # if the type is unkown
        if k == 'U':
            # skip
            continue

        # if no. of isolates for the types are less than the above threshold
        if len(v) < th:
            # minor isolates
            minors.extend(v)
            # excluded type
            rmkeys.append(k)

    # type isolate dict with low represetation types excluded
    type_isols = {k: v for k, v in type_isols.items() if k not in rmkeys}
    # and added back as minors under type 'O'thers
    type_isols['O'] = minors
    # modified table of genome and types
    typedata = [[i, k] for k, v in type_isols.items() for i in v]
    # dict of isolate and its type if the isolate is present on the tree
    isol_type = {i[0]: i[1] for i in typedata if i[0] in leaves}
    # color representation of types
    isol_type_color, type_color = colbyinfo(infodict=isol_type,
                                            sorting_func=typesortingfunc)

    # if a color dict was explicitly provided
    if typecoldict is not None:
        tcl = typecoldict.split(',')
        type_color = {tcl[x]: tcl[x + 1] for x in range(0, len(tcl), 2)}
        isol_type_color = {k: type_color[v] for k, v in isol_type.items()}

    for k, v in isol_type_color.items():

        if v == type_color['U']:
            isol_type_color[k] = 'white'

        if 'O' in type_color.keys() and v == type_color['O']:
            isol_type_color[k] = 'grey'

    type_color['U'] = 'white'
    type_color['O'] = 'grey'
    ###############################################

    # basic tree style with type annotation
    for n in t.traverse():

        # if branch support is less than 0.5, delete the branch
        if n.support < 0.5:
            n.delete()
            continue

        n.dist = 0.1
        ns = ete3.NodeStyle()
        if n.is_leaf():
            ns['size'] = 10
            if n.name in isol_type_color.keys():
                ns['bgcolor'] = isol_type_color[n.name]
            else:
                ns['bgcolor'] = 'grey'
        else:
            ns['size'] = 0
        n.set_style(ns)

    # If mapping is available, then use it to color leaves and branches
    if fld is not None:
        dmap = pandas.read_csv(fld)
        nrow = len(dmap)
        head = dmap.columns

        # colors for locations
        isol_loc = {
            dmap.at[x, 'accession']: dmap.at[x, 'location']
            for x in range(nrow)
        }
        isol_loc_color, loc_color = colbyinfo(infodict=isol_loc)

        # colors for months
        isol_month = {
            dmap.at[x, 'accession']: '-'.join(dmap.at[x,
                                                      'date'].split('-')[:2])
            for x in range(nrow) if dmap.at[x, 'date'].count('-') == 2
        }
        # months
        months = sorted(list(set(isol_month.values())))
        # dict of month names and corresponding key
        month_key = {}

        for x, i in enumerate(months):
            month_key[i] = x + 1

        # replace months with key in the above
        isol_mkey = {k: month_key[v] for k, v in isol_month.items()}
        months = sorted(list(set(isol_mkey.values())))
        nm = len(months)
        month_colors = seaborn.color_palette('Blues', n_colors=nm)
        isol_month_color = {}

        for k, v in isol_mkey.items():
            x = months.index(v)
            c = month_colors[x]
            isol_month_color[k] = matplotlib.colors.to_hex(c)

        boxsize = 10 * branch_scale / 100

        for n in t.traverse():

            if n.name not in isol_loc_color.keys():
                continue

            if n.is_leaf():
                rct1 = ete3.RectFace(width=boxsize,
                                     height=boxsize,
                                     fgcolor='',
                                     bgcolor=isol_loc_color[n.name])
                n.add_face(rct1, column=2, position='aligned')
                if n.name in isol_month_color.keys():
                    rct2 = ete3.RectFace(width=boxsize,
                                         height=boxsize,
                                         fgcolor='',
                                         bgcolor=isol_month_color[n.name])
                    n.add_face(rct2, column=3, position='aligned')
            else:
                n.img_style['size'] = 0

    ### legend ##################################
    if show_legend:
        ts.legend_position = 3
        stack_size = 0
        colx = 0

        for k, v in type_color.items():

            rct = ete3.RectFace(legend_box_size, legend_box_size, '', v)
            rct.margin_left = 10
            rct.margin_right = 10
            txt = ete3.TextFace(k, fsize=legend_font_size)
            txt.margin_left = 10
            txt.margin_right = 10

            if stack_size > max_legend_stack:
                stack_size = 0
                colx += 2

            if stack_size == 0:
                rct.margin_top = 20

            ts.legend.add_face(rct, column=colx)
            ts.legend.add_face(txt, column=colx + 1)
            stack_size += legend_box_size
    ###############################################

    ## output
    if show is not None:
        t.render(fplot, tree_style=ts, units='px', h=img_height, dpi=img_dpi)
        click.echo("{}: Tree plotting complete. Output was saved in {}".format(
            utils.timer(start), fplot))
    else:
        t.show(tree_style=ts)
    ################
예제 #10
0
파일: cli.py 프로젝트: matsengrp/gctree
def simulate(args):
    """Simulation subprogram.

    Simulates a Galton–Watson process, with mutation probabilities
    according to a user defined motif model e.g. S5F
    """
    random.seed(a=args.seed)
    mutation_model = mm.MutationModel(args.mutability, args.substitution)
    if args.lambda0 is None:
        args.lambda0 = [max([1, int(0.01 * len(args.sequence))])]
    args.sequence = args.sequence.upper()
    if args.sequence2 is not None:
        # Use the same mutation rate on both sequences
        if len(args.lambda0) == 1:
            args.lambda0 = [args.lambda0[0], args.lambda0[0]]
        elif len(args.lambda0) != 2:
            raise Exception("Only one or two lambda0 can be defined for a two "
                            "sequence simulation.")
        # Require both sequences to be in frame 1:
        if args.frame is not None and args.frame != 1:
            if args.verbose:
                print("Warning: When simulating with two sequences they are "
                      "truncated to be beginning at frame 1.")
            args.sequence = args.sequence[(args.frame -
                                           1):(args.frame - 1 +
                                               (3 *
                                                (((len(args.sequence) -
                                                   (args.frame - 1)) // 3))))]
            args.sequence2 = args.sequence2[(args.frame - 1):(
                args.frame - 1 + (3 * (((len(args.sequence2) -
                                         (args.frame - 1)) // 3))))]
        # Extract the bounds between sequence 1 and 2:
        seq_bounds = (
            (0, len(args.sequence)),
            (len(args.sequence), len(args.sequence) + len(args.sequence2)),
        )
        # Merge the two seqeunces to simplify future dealing with the pair:
        args.sequence += args.sequence2
    else:
        seq_bounds = None

    trials = 1000
    # this loop makes us resimulate if size too small, or backmutation
    for trial in range(trials):
        try:
            tree = mutation_model.simulate(
                args.sequence,
                seq_bounds=seq_bounds,
                progeny=lambda seq: args.lambda_,
                lambda0=args.lambda0,
                n=args.n,
                N=args.N,
                T=args.T,
                frame=args.frame,
                verbose=args.verbose,
            )
            # this will fail if backmutations
            collapsed_tree = bp.CollapsedTree(tree=tree)
            tree.ladderize()
            uniques = sum(node.abundance > 0
                          for node in collapsed_tree.tree.traverse())
            if uniques < 2:
                raise RuntimeError(f"collapsed tree contains {uniques} "
                                   "sampled sequences")
            break
        except RuntimeError as e:
            print(f"{e}, trying again")
        else:
            raise
    if trial == trials - 1:
        raise RuntimeError(f"{trials} attempts exceeded")

    # In the case of a sequence pair print them to separate files:
    if args.sequence2 is not None:
        fh1 = open(args.outbase + ".simulation_seq1.fasta", "w")
        fh2 = open(args.outbase + ".simulation_seq2.fasta", "w")
        fh1.write(">root\n")
        fh1.write(args.sequence[seq_bounds[0][0]:seq_bounds[0][1]] + "\n")
        fh2.write(">root\n")
        fh2.write(args.sequence[seq_bounds[1][0]:seq_bounds[1][1]] + "\n")
        for leaf in tree.iter_leaves():
            if leaf.abundance != 0:
                fh1.write(">" + leaf.name + "\n")
                fh1.write(leaf.sequence[seq_bounds[0][0]:seq_bounds[0][1]] +
                          "\n")
                fh2.write(">" + leaf.name + "\n")
                fh2.write(leaf.sequence[seq_bounds[1][0]:seq_bounds[1][1]] +
                          "\n")
    else:
        with open(args.outbase + ".simulation.fasta", "w") as f:
            f.write(">root\n")
            f.write(args.sequence + "\n")
            for leaf in tree.iter_leaves():
                if leaf.abundance != 0:
                    f.write(">" + leaf.name + "\n")
                    f.write(leaf.sequence + "\n")

    # some observable simulation stats to write
    abundance, distance_from_root, degree = zip(*[(
        node.abundance,
        utils.hamming_distance(node.sequence, args.sequence),
        sum(
            utils.hamming_distance(node.sequence, node2.sequence) == 1
            for node2 in collapsed_tree.tree.traverse()
            if node2.abundance and node2 is not node),
    ) for node in collapsed_tree.tree.traverse() if node.abundance])
    stats = pd.DataFrame({
        "genotype abundance": abundance,
        "Hamming distance to root genotype": distance_from_root,
        "Hamming neighbor genotypes": degree,
    })
    stats.to_csv(args.outbase + ".simulation.stats.tsv", sep="\t", index=False)

    print(f"{sum(leaf.abundance for leaf in collapsed_tree.tree.traverse())}"
          " simulated observed sequences")

    # render the full lineage tree
    ts = ete3.TreeStyle()
    ts.rotation = 90
    ts.show_leaf_name = False
    ts.show_scale = False

    colors = {}
    palette = ete3.SVG_COLORS
    palette -= set(["black", "white", "gray"])
    palette = itertools.cycle(list(palette))  # <-- circular iterator

    colors[tree.sequence] = "gray"

    for n in tree.traverse():
        nstyle = ete3.NodeStyle()
        nstyle["size"] = 10
        if args.plotAA:
            if n.AAseq not in colors:
                colors[n.AAseq] = next(palette)
            nstyle["fgcolor"] = colors[n.AAseq]
        else:
            if n.sequence not in colors:
                colors[n.sequence] = next(palette)
            nstyle["fgcolor"] = colors[n.sequence]
        n.set_style(nstyle)

    # this makes the rendered branch lenths correspond to time
    for node in tree.iter_descendants():
        node.dist = node.time - node.up.time
    tree.render(args.outbase + ".simulation.lineage_tree.svg", tree_style=ts)

    # render collapsed tree
    # create an id-wise colormap
    # NOTE: node.name can be a set
    colormap = {
        node.name: colors[node.sequence]
        for node in collapsed_tree.tree.traverse()
    }
    collapsed_tree.write(args.outbase + ".simulation.collapsed_tree.p")
    collapsed_tree.render(
        args.outbase + ".simulation.collapsed_tree.svg",
        idlabel=args.idlabel,
        colormap=colormap,
        frame=args.frame,
    )
    # print colormap to file
    with open(args.outbase + ".simulation.collapsed_tree.colormap.tsv",
              "w") as f:
        for name, color in colormap.items():
            f.write((name if isinstance(name, str) else ",".join(name)) +
                    "\t" + color + "\n")
예제 #11
0
def main(term, outbase=None, outfmt=None, nhx=False, show_img=False, recurs=0):
    graphics_fmts = set(outfmt).intersection(('png', 'jpg', 'svg', 'pdf')) \
                    if outfmt is not None else set()

    if graphics_fmts or (not outbase and not outfmt):
        # Define only when the above conditions are verified, so that you
        # can fallback on text methods when PyQt is not installed.
        if show_img:
            #async def?
            def add_img(node):
                nodeimg = getattr(node, 'img', None)
                if nodeimg:
                    if nodeimg.startswith('//'):
                        nodeimg = 'https:' + nodeimg
                    #await ?
                    ete3.add_face_to_node(ete3.ImgFace(
                        nodeimg,
                        width=int(node.imgwidth),
                        height=int(node.imgheight),
                        is_url=True),
                                          node,
                                          column=1,
                                          position='branch-right')
        else:

            def add_img(node):
                pass

        ns = ete3.NodeStyle(size=0)
        dashed_branch = ete3.NodeStyle(size=0, hz_line_type=1)

        def mylayout(node):
            node.set_style(ns)
            if not node.is_leaf():
                ete3.add_face_to_node(ete3.TextFace(node.name),
                                      node,
                                      column=0,
                                      position='branch-top')
                ete3.add_face_to_node(ete3.TextFace('\n'.join(
                    getattr(node, 'info', []))),
                                      node,
                                      column=0,
                                      position='branch-bottom')
            if node.support <= 0.5:
                node.set_style(dashed_branch)
            add_img(node)

    treesoups = get_wiki_tree(term)
    logger.info("Found %d phylogenetic trees", len(treesoups))
    if outfmt:
        if outbase:
            outbase += '-%d'
        outputfuncs = []
        if 'nwk' in outfmt:
            features = ['support', 'info', 'link', 'img'] if nhx else None

            def output(tree, i):
                # format 8: all names
                outfile = (outbase % i + '.nwk') if outbase else None
                txt = tree.write(outfile=outfile,
                                 format=8,
                                 format_root_node=True,
                                 features=features)
                if txt is not None:
                    print(txt)

            outputfuncs.append(output)
        if 'ascii' in outfmt:
            # Always to stdout
            def output(tree, i):
                print(tree.get_ascii())

            outputfuncs.append(output)
        if graphics_fmts and outbase:

            def output(tree, i):
                for fmt in graphics_fmts:
                    tree.render((outbase % i) + '.' + fmt, mylayout)

            outputfuncs.append(output)

        def outputs(tree, i):
            for outfunc in outputfuncs:
                outfunc(tree, i)
    else:

        def outputs(tree, i):
            tree.show(mylayout, name=('Tree n°%d: %s' % (i, tree.name)))

    for i, treesoup in enumerate(treesoups):
        tree, = build_tree(treesoup, recurs)
        outputs(tree, i)
예제 #12
0
    '../rooted_partitions-with_named_branches.treefile', format=1)
final_transfers = {
    '003575': [
        'm83 = LCA[GCA000012905_ABA79637, GCA001459775_CUU43052]: Transfer, Mapping --> n262, Recipient --> n297'
    ],
    '004119': [
        'm16 = LCA[GCA000019945_ACB79599, GCA900100665_SDG33339]: Transfer, Mapping --> n134, Recipient --> n293'
    ]
}

genome_table = pd.read_table('../selected_genomes.tab', index_col=31)
genome_table.index = [
    index.replace('_', '').split('.')[0] for index in genome_table.index
]

reticulation_style = ete3.NodeStyle()
donor_style = ete3.NodeStyle()
recipient_style = ete3.NodeStyle()

#reticulation_style["fgcolor"]       = "#0f0f0f"
#reticulation_style["size"]          = 0
reticulation_style["vt_line_color"] = "#ff0000"
reticulation_style["hz_line_color"] = "#ff0000"
reticulation_style["vt_line_width"] = 5
reticulation_style["hz_line_width"] = 5
reticulation_style["vt_line_type"] = 1
reticulation_style["hz_line_type"] = 0

donor_style['bgcolor'] = 'LightSteelBlue'
recipient_style['bgcolor'] = 'DarkSeaGreen'
예제 #13
0
    def run_FastTree(self, ctx, params):
        """
        Method for Tree building of either DNA or PROTEIN sequences
        **
        **        input_type: MSA
        **        output_type: Tree
        :param params: instance of type "FastTree_Params" (FastTree Input
           Params) -> structure: parameter "workspace_name" of type
           "workspace_name" (** The workspace object refs are of form: ** ** 
           objects = ws.get_objects([{'ref':
           params['workspace_id']+'/'+params['obj_name']}]) ** ** "ref" means
           the entire name combining the workspace id and the object name **
           "id" is a numerical identifier of the workspace or object, and
           should just be used for workspace ** "name" is a string identifier
           of a workspace or object.  This is received from Narrative.),
           parameter "desc" of String, parameter "input_ref" of type
           "data_obj_ref", parameter "output_name" of type "data_obj_name",
           parameter "species_tree_flag" of Long, parameter "intree_ref" of
           type "data_obj_ref", parameter "fastest" of Long, parameter
           "pseudo" of Long, parameter "gtr" of Long, parameter "wag" of
           Long, parameter "noml" of Long, parameter "nome" of Long,
           parameter "cat" of Long, parameter "nocat" of Long, parameter
           "gamma" of Long
        :returns: instance of type "FastTree_Output" (FastTree Output) ->
           structure: parameter "report_name" of type "data_obj_name",
           parameter "report_ref" of type "data_obj_ref", parameter
           "output_ref" of type "data_obj_ref"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN run_FastTree

        # init
        #
        dfu = DFUClient(self.callbackURL)
        console = []
        invalid_msgs = []
        self.log(console, 'Running run_FastTree with params=')
        self.log(console, "\n" + pformat(params))
        report = ''
        #        report = 'Running run_FastTree with params='
        #        report += "\n"+pformat(params)

        #### do some basic checks
        #
        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'input_ref' not in params:
            raise ValueError('input_ref parameter is required')
        if 'output_name' not in params:
            raise ValueError('output_name parameter is required')

        #### Get the input_ref MSA object
        ##
        try:
            ws = workspaceService(self.workspaceURL, token=ctx['token'])
            objects = ws.get_objects([{'ref': params['input_ref']}])
            data = objects[0]['data']
            info = objects[0]['info']
            input_name = info[1]
            input_type_name = info[2].split('.')[1].split('-')[0]

        except Exception as e:
            raise ValueError(
                'Unable to fetch input_ref object from workspace: ' + str(e))
            #to get the full stack trace: traceback.format_exc()

        if input_type_name == 'MSA':
            MSA_in = data
            # DEBUG
            #for field in MSA_in.keys():
            #    self.log(console, "MSA key: '"+field+"'")
            row_order = []
            default_row_labels = dict()
            if 'row_order' in MSA_in:
                row_order = MSA_in['row_order']
            else:
                row_order = sorted(MSA_in['alignment'].keys())

            if 'default_row_labels' in MSA_in:
                default_row_labels = MSA_in['default_row_labels']
            else:
                for row_id in row_order:
                    default_row_labels[row_id] = row_id
            if len(row_order) < 2:
                self.log(
                    invalid_msgs, "must have multiple records in MSA: " +
                    params['input_ref'])
            # DEBUG
            #for row_id in row_order:
            #    self.log(console, "row_id: '"+row_id+"' default_row_label: '"+default_row_labels[row_id]+"'")

            # export features to FASTA file
            new_ids = dict()
            input_MSA_file_path = os.path.join(self.scratch,
                                               input_name + ".fasta")
            self.log(console, 'writing fasta file: ' + input_MSA_file_path)
            records = []
            for row_id in row_order:
                # take care of characters that will mess up newick and/or fasttree
                row_id_disp = re.sub('\s', '_', row_id)
                row_id_disp = re.sub('\/', '%' + '/'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub(r'\\', '%' + '\\'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\(', '%' + '('.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\)', '%' + ')'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\[', '%' + '['.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\]', '%' + ']'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\:', '%' + ':'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\;', '%' + ';'.encode("hex"),
                                     row_id_disp)
                row_id_disp = re.sub('\|', '%' + ';'.encode("hex"),
                                     row_id_disp)
                new_ids[row_id] = row_id_disp

                #self.log(console,"row_id: '"+row_id+"' row_id_disp: '"+row_id_disp+"'")  # DEBUG
                #self.log(console,"alignment: '"+MSA_in['alignment'][row_id]+"'")  # DEBUG
                # using SeqIO makes multiline sequences.  FastTree doesn't like
                #record = SeqRecord(Seq(MSA_in['alignment'][row_id]), id=row_id, description=default_row_labels[row_id])
                #records.append(record)
                #SeqIO.write(records, input_MSA_file_path, "fasta")
                #records.extend(['>'+row_id,
                records.extend(
                    ['>' + row_id_disp, MSA_in['alignment'][row_id]])
            with open(input_MSA_file_path, 'w', 0) as input_MSA_file_handle:
                input_MSA_file_handle.write("\n".join(records) + "\n")

            # DEBUG
            #self.log(console, "MSA INPUT:")
            #self.log(console, "\n".join(records)+"\n")  # DEBUG

            # Determine whether nuc or protein sequences
            #
            NUC_MSA_pattern = re.compile(
                "^[\.\-_ACGTUXNRYSWKMBDHVacgtuxnryswkmbdhv \t\n]+$")
            all_seqs_nuc = True
            for row_id in row_order:
                #self.log(console, row_id+": '"+MSA_in['alignment'][row_id]+"'")
                if NUC_MSA_pattern.match(MSA_in['alignment'][row_id]) == None:
                    all_seqs_nuc = False
                    break

        # Missing proper input_type
        #
        else:
            raise ValueError('Cannot yet handle input_name type of: ' +
                             type_name)

        # Get start tree (if any)
        #
        if 'intree_ref' in params and params['intree_ref'] != None and params[
                'intree_ref'] != '':
            try:
                ws = workspaceService(self.workspaceURL, token=ctx['token'])
                objects = ws.get_objects([{'ref': params['intree_ref']}])
                data = objects[0]['data']
                info = objects[0]['info']
                intree_name = info[1]
                intree_type_name = info[2].split('.')[1].split('-')[0]

            except Exception as e:
                raise ValueError(
                    'Unable to fetch intree_ref object from workspace: ' +
                    str(e))
                #to get the full stack trace: traceback.format_exc()

            if intree_type_name == 'Tree':
                tree_in = data
                intree_newick_file_path = os.path.join(self.scratch,
                                                       intree_name + ".newick")
                self.log(console,
                         'writing intree file: ' + intree_newick_file_path)
                intree_newick_file_handle = open(intree_newick_file_path, 'w',
                                                 0)
                intree_newick_file_handle.write(tree_in['tree'])
                intree_newick_file_handle.close()
            else:
                raise ValueError('Cannot yet handle intree type of: ' +
                                 type_name)

        # DEBUG: check the MSA file contents
#        with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle:
#            for line in input_MSA_file_handle:
#                #self.log(console,"MSA_LINE: '"+line+"'")  # too big for console
#                self.log(invalid_msgs,"MSA_LINE: '"+line+"'")

# validate input data
#
        if len(invalid_msgs) > 0:

            # load the method provenance from the context object
            self.log(console, "SETTING PROVENANCE")  # DEBUG
            provenance = [{}]
            if 'provenance' in ctx:
                provenance = ctx['provenance']
            # add additional info to provenance here, in this case the input data object reference
            provenance[0]['input_ws_objects'] = []
            provenance[0]['input_ws_objects'].append(params['input_ref'])
            if 'intree_ref' in params and params[
                    'intree_ref'] != None and params['intree_ref'] != '':
                provenance[0]['input_ws_objects'].append(params['intree_ref'])
            provenance[0]['service'] = 'kb_fasttree'
            provenance[0]['method'] = 'run_FastTree'

            # report
            report += "FAILURE\n\n" + "\n".join(invalid_msgs) + "\n"
            reportObj = {'objects_created': [], 'text_message': report}

            reportName = 'fasttree_report_' + str(uuid.uuid4())
            report_obj_info = ws.save_objects({
                #                'id':info[6],
                'workspace':
                params['workspace_name'],
                'objects': [{
                    'type': 'KBaseReport.Report',
                    'data': reportObj,
                    'name': reportName,
                    'meta': {},
                    'hidden': 1,
                    'provenance': provenance
                }]
            })[0]

            self.log(console, "BUILDING RETURN OBJECT")
            returnVal = {
                'report_name':
                reportName,
                'report_ref':
                str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' +
                str(report_obj_info[4]),
                'output_ref':
                None
            }
            self.log(console, "run_FastTree DONE")
            return [returnVal]

        ### Construct the command
        #
        #  e.g. fasttree -in <fasta_in> -out <fasta_out> -maxiters <n> -haxours <h>
        #
        fasttree_cmd = [self.FASTTREE_bin]
        #        fasttree_cmd = []  # DEBUG

        # check for necessary files
        if not os.path.isfile(self.FASTTREE_bin):
            raise ValueError("no such file '" + self.FASTTREE_bin + "'")
        if not os.path.isfile(input_MSA_file_path):
            raise ValueError("no such file '" + input_MSA_file_path + "'")
        if not os.path.getsize(input_MSA_file_path) > 0:
            raise ValueError("empty file '" + input_MSA_file_path + "'")

        # DEBUG
#        with open(input_MSA_file_path,'r',0) as input_MSA_file_handle:
#            for line in input_MSA_file_handle:
#                #self.log(console,"MSA LINE: '"+line+"'")  # too big for console
#                self.log(invalid_msgs,"MSA LINE: '"+line+"'")

# set the output path
        timestamp = int(
            (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()
            * 1000)
        output_dir = os.path.join(self.scratch, 'output.' + str(timestamp))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        output_newick_file_path = os.path.join(
            output_dir, params['output_name'] + '.newick')

        # This doesn't work for some reason
        #        fasttree_cmd.append('-out')
        #        fasttree_cmd.append(output_newick_file_path)

        # options
        #fasttree_cmd.append('-quiet')
        fasttree_cmd.append('-nopr')
        if 'fastest' in params and params['fastest'] != None and params[
                'fastest'] != 0:
            fasttree_cmd.append('-fastest')
        if 'pseudo' in params and params['pseudo'] != None and params[
                'pseudo'] != 0:
            fasttree_cmd.append('-pseudo')
        if 'intree_ref' in params and params['intree_ref'] != None and params[
                'intree_ref'] != '':
            fasttree_cmd.append('-intree')
            fasttree_cmd.append(intree_newick_file_path)
        if all_seqs_nuc and 'gtr' in params and params[
                'gtr'] != None and params['gtr'] != 0:
            fasttree_cmd.append('-gtr')
        if not all_seqs_nuc and 'wag' in params and params[
                'wag'] != None and params['wag'] != 0:
            fasttree_cmd.append('-wag')
        if 'noml' in params and params['noml'] != None and params['noml'] != 0:
            fasttree_cmd.append('-noml')
        if 'nome' in params and params['nome'] != None and params['nome'] != 0:
            fasttree_cmd.append('-nome')
        if 'nocat' in params and params['nocat'] != None and params[
                'nocat'] != 0:
            fasttree_cmd.append('-nocat')
        elif not all_seqs_nuc and 'cat' in params and params[
                'cat'] != None and params['cat'] > 0:
            # DEBUG
            #        elif 'cat' in params and params['cat'] != None and params['cat'] > 0:
            fasttree_cmd.append('-cat')
            fasttree_cmd.append(str(params['cat']))
        if 'gamma' in params and params['gamma'] != None and params[
                'gamma'] != 0:
            fasttree_cmd.append('-gamma')

        if all_seqs_nuc:
            fasttree_cmd.append('-nt')

        # better (meaning it works) to write MSA to STDIN (below)
#        fasttree_cmd.append('<')
#        fasttree_cmd.append(input_MSA_file_path)
        fasttree_cmd.append('>')
        fasttree_cmd.append(output_newick_file_path)

        # Run FASTTREE, capture output as it happens
        #
        self.log(console, 'RUNNING FASTTREE:')
        self.log(console, '    ' + ' '.join(fasttree_cmd))
        #        self.log(console, '    '+self.FASTTREE_bin+' '+' '.join(fasttree_cmd))
        #        report += "\n"+'running FASTTREE:'+"\n"
        #        report += '    '+' '.join(fasttree_cmd)+"\n"

        # FastTree requires shell=True in order to see input data
        env = os.environ.copy()
        #        p = subprocess.Popen(fasttree_cmd, \
        joined_fasttree_cmd = ' '.join(
            fasttree_cmd
        )  # redirect out doesn't work with subprocess unless you join command first
        p = subprocess.Popen([joined_fasttree_cmd], \
                             cwd = self.scratch, \
                             stdin = subprocess.PIPE, \
                             stdout = subprocess.PIPE, \
                             stderr = subprocess.PIPE, \
                             shell = True, \
                             env = env)
        #                             stdout = subprocess.PIPE, \
        #                             executable = '/bin/bash' )

        #        p = subprocess.Popen(fasttree_cmd, \
        #                             cwd = self.scratch, \
        #                             stdout = subprocess.PIPE, \
        #                             stderr = subprocess.STDOUT, \
        #                             shell = True, \
        #                             env = env, \
        #                             executable = self.FASTTREE_bin )

        #                             shell = True, \  # seems necessary?
        #                            stdout = subprocess.PIPE, \
        #                             stdout = output_newick_file_path, \

        # write MSA to process for FastTree
        #
        with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle:
            for line in input_MSA_file_handle:
                p.stdin.write(line)
        p.stdin.close()
        p.wait()

        # Read output
        #
        while True:
            line = p.stdout.readline()
            #line = p.stderr.readline()
            if not line: break
            self.log(console, line.replace('\n', ''))

        p.stdout.close()
        #p.stderr.close()
        p.wait()
        self.log(console, 'return code: ' + str(p.returncode))
        if p.returncode != 0:
            raise ValueError('Error running FASTTREE, return code: ' +
                             str(p.returncode) + '\n\n' + '\n'.join(console))

        # Check that FASTREE produced output
        #
        if not os.path.isfile(output_newick_file_path):
            raise ValueError("failed to create FASTTREE output: " +
                             output_newick_file_path)
        elif not os.path.getsize(output_newick_file_path) > 0:
            raise ValueError("created empty file for FASTTREE output: " +
                             output_newick_file_path)

        # load the method provenance from the context object
        #
        self.log(console, "SETTING PROVENANCE")  # DEBUG
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects'] = []
        provenance[0]['input_ws_objects'].append(params['input_ref'])
        if 'intree_ref' in params and params['intree_ref'] != None and params[
                'intree_ref'] != '':
            provenance[0]['input_ws_objects'].append(params['intree_ref'])
        provenance[0]['service'] = 'kb_fasttree'
        provenance[0]['method'] = 'run_FastTree'

        # Upload results
        #
        if len(invalid_msgs) == 0:
            self.log(console, "UPLOADING RESULTS")  # DEBUG

            tree_name = params['output_name']
            tree_description = params['desc']
            tree_type = 'GeneTree'
            if 'species_tree_flag' in params and params[
                    'species_tree_flag'] != None and params[
                        'species_tree_flag'] != 0:
                tree_type = 'SpeciesTree'

            with open(output_newick_file_path, 'r',
                      0) as output_newick_file_handle:
                output_newick_buf = output_newick_file_handle.read()
            output_newick_buf = output_newick_buf.rstrip()
            if not output_newick_buf.endswith(';'):
                output_newick_buf += ';'
            self.log(console, "\nNEWICK:\n" + output_newick_buf + "\n")

            # Extract info from MSA
            #
            tree_attributes = None
            default_node_labels = None
            ws_refs = None
            kb_refs = None
            leaf_list = None
            if default_row_labels:
                default_node_labels = dict()
                leaf_list = []
                for row_id in default_row_labels.keys():
                    new_row_id = new_ids[row_id]
                    #default_node_labels[row_id] = default_row_labels[row_id]
                    default_node_labels[new_row_id] = default_row_labels[
                        row_id]
                    leaf_list.append(new_row_id)

            if 'ws_refs' in MSA_in.keys() and MSA_in['ws_refs'] != None:
                ws_refs = MSA_in['ws_refs']
            if 'kb_refs' in MSA_in.keys() and MSA_in['kb_refs'] != None:
                kb_refs = MSA_in['kb_refs']

            # Build output_Tree structure
            #
            output_Tree = {
                'name': tree_name,
                'description': tree_description,
                'type': tree_type,
                'tree': output_newick_buf
            }
            if tree_attributes != None:
                output_Tree['tree_attributes'] = tree_attributes
            if default_node_labels != None:
                output_Tree['default_node_labels'] = default_node_labels
            if ws_refs != None:
                output_Tree['ws_refs'] = ws_refs
            if kb_refs != None:
                output_Tree['kb_refs'] = kb_refs
            if leaf_list != None:
                output_Tree['leaf_list'] = leaf_list

            # Store output_Tree
            #
            try:
                new_obj_info = ws.save_objects({
                    'workspace':
                    params['workspace_name'],
                    'objects': [{
                        'type': 'KBaseTrees.Tree',
                        'data': output_Tree,
                        'name': params['output_name'],
                        'meta': {},
                        'provenance': provenance
                    }]
                })[0]
            except Exception as e:
                raise ValueError('Unable to save tree ' +
                                 params['output_name'] +
                                 ' object to workspace ' +
                                 str(params['workspace_name']) + ': ' + str(e))
                #to get the full stack trace: traceback.format_exc()

        # If input data is invalid
        #
        self.log(console, "BUILDING REPORT")  # DEBUG

        if len(invalid_msgs) != 0:
            reportName = 'fasttree_report_' + str(uuid.uuid4())
            report += "FAILURE\n\n" + "\n".join(invalid_msgs) + "\n"
            reportObj = {'objects_created': [], 'text_message': report}
            report_obj_info = ws.save_objects({
                #'id':info[6],
                'workspace':
                params['workspace_name'],
                'objects': [{
                    'type': 'KBaseReport.Report',
                    'data': reportObj,
                    'name': reportName,
                    'meta': {},
                    'hidden': 1,
                    'provenance': provenance
                }]
            })[0]
            returnVal = {
                'report_name':
                reportName,
                'report_ref':
                str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' +
                str(report_obj_info[4]),
            }
            return [returnVal]

        # Upload newick and newick labels
        #
        newick_labels_file = params['output_name'] + '-labels.newick'
        output_newick_labels_file_path = os.path.join(output_dir,
                                                      newick_labels_file)
        mod_newick_buf = output_newick_buf
        for row_id in new_ids:
            new_id = new_ids[row_id]
            label = default_node_labels[new_id]
            label = re.sub('\s', '_', label)
            label = re.sub('\/', '%' + '/'.encode("hex"), label)
            label = re.sub(r'\\', '%' + '\\'.encode("hex"), label)
            label = re.sub('\(', '%' + '('.encode("hex"), label)
            label = re.sub('\)', '%' + ')'.encode("hex"), label)
            label = re.sub('\[', '%' + '['.encode("hex"), label)
            label = re.sub('\]', '%' + ']'.encode("hex"), label)
            label = re.sub('\:', '%' + ':'.encode("hex"), label)
            label = re.sub('\;', '%' + ';'.encode("hex"), label)
            label = re.sub('\|', '%' + ';'.encode("hex"), label)
            mod_newick_buf = re.sub('\(' + new_id + '\:', '(' + label + ':',
                                    mod_newick_buf)
            mod_newick_buf = re.sub('\,' + new_id + '\:', ',' + label + ':',
                                    mod_newick_buf)

            #self.log(console, "new_id: '"+new_id+"' label: '"+label+"'")  # DEBUG

        mod_newick_buf = re.sub('_', ' ', mod_newick_buf)
        with open(output_newick_labels_file_path, 'w',
                  0) as output_newick_labels_file_handle:
            output_newick_labels_file_handle.write(mod_newick_buf)

        # upload
        try:
            newick_upload_ret = dfu.file_to_shock({
                'file_path': output_newick_file_path,
                #'pack': 'zip'})
                'make_handle': 0
            })
        except:
            raise ValueError('error uploading newick file to shock')
        try:
            newick_labels_upload_ret = dfu.file_to_shock({
                'file_path': output_newick_labels_file_path,
                #'pack': 'zip'})
                'make_handle': 0
            })
        except:
            raise ValueError('error uploading newick labels file to shock')

        # Create html with tree image
        #
        timestamp = int(
            (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()
            * 1000)
        html_output_dir = os.path.join(self.scratch,
                                       'output_html.' + str(timestamp))
        if not os.path.exists(html_output_dir):
            os.makedirs(html_output_dir)
        html_file = params['output_name'] + '.html'
        png_file = params['output_name'] + '.png'
        pdf_file = params['output_name'] + '.pdf'
        output_html_file_path = os.path.join(html_output_dir, html_file)
        output_png_file_path = os.path.join(html_output_dir, png_file)
        output_pdf_file_path = os.path.join(output_dir, pdf_file)

        # init ETE3 objects
        t = ete3.Tree(mod_newick_buf)
        ts = ete3.TreeStyle()

        # customize
        ts.show_leaf_name = True
        ts.show_branch_length = False
        ts.show_branch_support = True
        #ts.scale = 50 # 50 pixels per branch length unit
        ts.branch_vertical_margin = 5  # pixels between adjacent branches
        ts.title.add_face(ete3.TextFace(params['output_name'] + ": " +
                                        params['desc'],
                                        fsize=10),
                          column=0)

        node_style = ete3.NodeStyle()
        node_style["fgcolor"] = "#606060"  # for node balls
        node_style["size"] = 10  # for node balls (gets reset based on support)
        node_style["vt_line_color"] = "#606060"
        node_style["hz_line_color"] = "#606060"
        node_style["vt_line_width"] = 2
        node_style["hz_line_width"] = 2
        node_style["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        node_style["hz_line_type"] = 0

        leaf_style = ete3.NodeStyle()
        leaf_style["fgcolor"] = "#ffffff"  # for node balls
        leaf_style["size"] = 2  # for node balls (we're using it to add space)
        leaf_style["vt_line_color"] = "#606060"  # unecessary
        leaf_style["hz_line_color"] = "#606060"
        leaf_style["vt_line_width"] = 2
        leaf_style["hz_line_width"] = 2
        leaf_style["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        leaf_style["hz_line_type"] = 0

        for n in t.traverse():
            if n.is_leaf():
                style = leaf_style
            else:
                style = ete3.NodeStyle()
                for k in node_style.keys():
                    style[k] = node_style[k]

                if n.support > 0.95:
                    style["size"] = 6
                elif n.support > 0.90:
                    style["size"] = 5
                elif n.support > 0.80:
                    style["size"] = 4
                else:
                    style["size"] = 2

            n.set_style(style)

        # save images
        dpi = 300
        img_units = "in"
        img_pix_width = 1200
        img_in_width = round(float(img_pix_width) / float(dpi), 1)
        img_html_width = img_pix_width // 2
        t.render(output_png_file_path,
                 w=img_in_width,
                 units=img_units,
                 dpi=dpi,
                 tree_style=ts)
        t.render(output_pdf_file_path,
                 w=img_in_width,
                 units=img_units,
                 tree_style=ts)  # dpi irrelevant

        # make html
        html_report_lines = []
        html_report_lines += ['<html>']
        html_report_lines += [
            '<head><title>KBase FastTree-2: ' + params['output_name'] +
            '</title></head>'
        ]
        html_report_lines += ['<body bgcolor="white">']
        html_report_lines += [
            '<img width=' + str(img_html_width) + ' src="' + png_file + '">'
        ]
        html_report_lines += ['</body>']
        html_report_lines += ['</html>']

        html_report_str = "\n".join(html_report_lines)
        with open(output_html_file_path, 'w', 0) as html_handle:
            html_handle.write(html_report_str)

        # upload images and html
        try:
            png_upload_ret = dfu.file_to_shock({
                'file_path': output_png_file_path,
                #'pack': 'zip'})
                'make_handle': 0
            })
        except:
            raise ValueError('error uploading png file to shock')
        try:
            pdf_upload_ret = dfu.file_to_shock({
                'file_path': output_pdf_file_path,
                #'pack': 'zip'})
                'make_handle': 0
            })
        except:
            raise ValueError('error uploading pdf file to shock')
        try:
            html_upload_ret = dfu.file_to_shock({
                'file_path': html_output_dir,
                'make_handle': 0,
                'pack': 'zip'
            })
        except:
            raise ValueError('error uploading png file to shock')

        # Create report obj
        #
        reportName = 'blast_report_' + str(uuid.uuid4())
        #report += output_newick_buf+"\n"
        reportObj = {
            'objects_created': [],
            #'text_message': '',  # or is it 'message'?
            'message': '',  # or is it 'text_message'?
            'direct_html': '',
            'direct_html_link_index': None,
            'file_links': [],
            'html_links': [],
            'workspace_name': params['workspace_name'],
            'report_object_name': reportName
        }
        reportObj['objects_created'].append({
            'ref':
            str(params['workspace_name']) + '/' + str(params['output_name']),
            'description':
            params['output_name'] + ' Tree'
        })
        reportObj['direct_html_link_index'] = 0
        reportObj['html_links'] = [{
            'shock_id': html_upload_ret['shock_id'],
            'name': html_file,
            'label': params['output_name'] + ' HTML'
        }]
        reportObj['file_links'] = [{
            'shock_id': newick_upload_ret['shock_id'],
            'name': params['output_name'] + '.newick',
            'label': params['output_name'] + ' NEWICK'
        }, {
            'shock_id':
            newick_labels_upload_ret['shock_id'],
            'name':
            params['output_name'] + '-labels.newick',
            'label':
            params['output_name'] + ' NEWICK (with labels)'
        }, {
            'shock_id': png_upload_ret['shock_id'],
            'name': params['output_name'] + '.png',
            'label': params['output_name'] + ' PNG'
        }, {
            'shock_id': pdf_upload_ret['shock_id'],
            'name': params['output_name'] + '.pdf',
            'label': params['output_name'] + ' PDF'
        }]

        SERVICE_VER = 'release'
        reportClient = KBaseReport(self.callbackURL,
                                   token=ctx['token'],
                                   service_ver=SERVICE_VER)
        report_info = reportClient.create_extended_report(reportObj)

        # Done
        #
        self.log(console, "BUILDING RETURN OBJECT")
        returnVal = {
            'report_name':
            report_info['name'],
            'report_ref':
            report_info['ref'],
            'output_ref':
            str(new_obj_info[6]) + '/' + str(new_obj_info[0]) + '/' +
            str(new_obj_info[4])
        }

        self.log(console, "run_FastTree DONE")
        #END run_FastTree

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method run_FastTree return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]
예제 #14
0
    def ete3_pdf_tree_output(self):
        output_tree = ete3.Tree(self.ori_tree_string)
        output_tree.ladderize()

        # treestyle
        ts = ete3.TreeStyle()
        ts.show_leaf_name = False

        # generate color scheme for taxon_to_clusterid
        if len(self.clusterid_to_taxa.keys()) > 1:
            clusterid_to_color = self.generate_color_scheme(
                self.clusterid_to_taxa.keys())
        else:
            clusterid_to_color = {self.clusterid_to_taxa.keys()[0]: '#ff2929'}

        for n, node in enumerate(output_tree.traverse(strategy='levelorder')):
            if n == 0:
                try:
                    ts.scale_length = float('{:.3f}'.format(
                        node.get_farthest_leaf()[-1] / 10))
                except:
                    pass

            if node.is_leaf():
                # color branches
                ns = ete3.NodeStyle()
                ns["size"] = 0  # no node shape

                taxon = node.name
                if taxon in self.taxon_to_clusterid:
                    clusterid = self.taxon_to_clusterid[taxon]
                    ns["hz_line_color"] = clusterid_to_color[clusterid]
                    # write taxon names aligned to the right
                    taxon_name = ete3.TextFace(
                        taxon,
                        ftype='Arial',
                        fsize=2,
                        bold=True,
                        fgcolor=clusterid_to_color[clusterid])
                else:
                    # write taxon names aligned to the right
                    taxon_name = ete3.TextFace(taxon,
                                               ftype='Arial',
                                               fsize=2,
                                               fstyle="italic")

                node.set_style(ns)
                taxon_name.margin_left = 2
                node.add_face(taxon_name, column=0, position='branch-right')

            else:
                ns = ete3.NodeStyle()
                ns["size"] = 0  # no node shape

                # set node style
                node.set_style(ns)

        heatmap_headers = ['Cluster-ID']
        output_tree = self.generate_heatmap(output_tree,
                                            self.taxon_to_clusterid,
                                            clusterid_to_color)

        # heatmap header
        for lh_index, legend_header in enumerate(heatmap_headers):
            header_face = ete3.TextFace(legend_header, ftype='Arial', fsize=2)
            header_face.hz_align = 1
            header_face.vt_align = 1
            header_face.margin_left = 5
            header_face.margin_right = 5
            ts.aligned_header.add_face(header_face, lh_index)

        # render as pdf
        output_tree.render('pdftree_{}.pdf'.format(self.outfname),
                           tree_style=ts)
def evolve_tree(newick,
                dup_mut=0.00043,
                del_mut=0.0,
                dup_rev=0.00086,
                del_rev=0.0):
    t = copy.deepcopy(newick)

    mutstyle = ete3.NodeStyle()
    mutstyle['fgcolor'] = 'red'

    gen_per_snp = 1 / (
        25 * .0076
    )  #Generations per unit of branch length/SNP; .0076 is SNPs per year, 25 is years per generation
    for node in t.traverse():
        if node.is_root():
            node.add_feature('cnv_state', 'reference')
            continue
        node.add_feature('cnv_state', node.up.cnv_state)
        orig_state = node.cnv_state

        ###
        ##Full generational simulation; use when multiple mutations per branch are allowed
        #for generation in xrange(int(node.dist * gen_per_snp)):
        #	evolve_node(node, dup_mut=dup_mut, del_mut=del_mut, dup_rev=dup_rev, del_rev=del_rev)
        ###

        ###
        #Shortcut simulation; more accurate times since generations aren't rounded down
        #Right now, does NOT WORK for multi-parameter model
        #rand_draw = random.random()
        #if rand_draw >= (1 - dup_mut) ** (max(0.5, node.dist) * gen_per_snp): #NEW: LEN 0 NODES ARE TREATED AS LEN 0.5!
        #	node.cnv_state = 'deletion'
        ###

        ###
        #Generational mutation including fractional generations
        #Works for multi-parameter model
        gens = int(node.dist * gen_per_snp)
        frac_gen = node.dist * gen_per_snp - gens  #The fractional remainder of a generation

        for gen in xrange(gens):
            rand_draw = random.random()
            if node.cnv_state == 'reference':
                if rand_draw < dup_mut:
                    node.cnv_state = 'mutation'

            else:
                if rand_draw < dup_rev:
                    node.cnv_state = 'reference'

        rand_draw = random.random()
        if node.cnv_state == 'reference':
            if rand_draw >= (1 - dup_mut)**frac_gen:
                node.cnv_state = 'mutation'
        else:
            if rand_draw >= (1 - dup_rev)**frac_gen:
                node.cnv_state = 'reference'
        '''
		if node.cnv_state != orig_state:
			node.set_style(mutstyle)
		if node.is_leaf() and node.cnv_state != 'reference':
			node.add_face(ete3.TextFace(' %s' %(node.cnv_state[:3])), 1, 'branch-right')
			node.cnv_state = 'mutation'
		'''
    return t
예제 #16
0
def tree_draw(tree_file,
              tree_name=None,
              order_vector_file=None,
              cell_colors_file=None,
              clustering_colors_file=None,
              clustering_sizes_file=None,
              intermediate_node_sizes_file=None,
              intermediate_node_labels_file=None,
              leaf_labels_file=None,
              legend_file=None,
              duplicate_file=None,
              tree_scale='linear',
              tree_rotation=True,
              font_size=7,
              font_legend=7,
              node_size=3,
              scale_rate=None,
              distance_factor=1,
              y_scale=True):

    t = ete3.Tree(newick=tree_file, format=1)
    ts = ete3.TreeStyle()
    if tree_rotation:
        ts.rotation = 90
    ts.show_leaf_name = True
    ts.show_scale = False
    ts.scale = 1
    if tree_name:
        ts.title.add_face(ete3.TextFace(tree_name, fsize=20), column=0)

    styles = {}
    max_dist = 0

    # initialize all nodes and branches
    for n in t.traverse():
        styles[n.name] = dict()
        styles[n.name]['style'] = ete3.NodeStyle()
        styles[n.name]['style']['fgcolor'] = 'black'
        styles[n.name]['style']["vt_line_width"] = 2
        styles[n.name]['style']["hz_line_width"] = 1
        max_dist = max(max_dist, n.dist)
        # print (max_dist)

    # calculate the scale for the tree (log, linear and right size)
    if tree_scale == 'log':
        max_dist = 0

    root = t.get_tree_root()
    last_leaf = root.get_farthest_leaf()
    ts.y_axis['scale_min_value'] = root.dist
    ts.y_axis['scale_max_value'] = last_leaf[1]

    for n in t.traverse():
        if tree_scale == 'log':
            if n == root:
                styles[n.name]['dist'] = 0
            else:
                father_path = 0
                for ancestor in n.get_ancestors():
                    father_path += styles[ancestor.name]['dist']

                dist = math.log10(n.get_distance(root) * distance_factor +
                                  1) - father_path
                if dist < 0:
                    dist = 0
                styles[n.name]['dist'] = dist
                max_dist = max(max_dist, dist)

        elif tree_scale == 'linear':
            # if max_dist > 1:
            #     styles[n.name]['dist'] = round(n.dist/max_dist)
            # else:
            styles[n.name]['dist'] = n.dist

    # leaf styles and update distance
    if not scale_rate:
        # scale_rate = max(10, round(1/max_dist))
        scale_rate = ts.scale
    for n in t.traverse():
        if 'dist' in styles[n.name]:
            n.dist = styles[n.name]['dist'] * scale_rate
        if not n.is_leaf():
            styles[n.name]['style']["size"] = 0
        else:
            styles[n.name]['style']["size"] = node_size

    # add bootstrap values to the branches (size of the node)
    if intermediate_node_sizes_file:
        bootsrtap_sizes = utils.get_bootsrtap_size(
            intermediate_node_sizes_file)
        for branch, size in bootsrtap_sizes.items():
            styles[branch]['style']["size"] = size
            styles[branch]['style']['fgcolor'] = 'black'

    # add colors to the leafs
    if cell_colors_file:
        cells_colors = utils.get_cells_colors(cell_colors_file)
        for name, color in cells_colors.items():
            styles[name]['style']['fgcolor'] = color

    # reorder the tree by pre-proses if possible
    if order_vector_file:
        leaf_order = utils.get_leaf_order(order_vector_file)
        for n in t.traverse('postorder'):
            if n.get_descendants():
                a = ''
                for leaf in n.get_descendants(strategy='postorder'):
                    if leaf.is_leaf():
                        if not a:
                            a = leaf
                b = n.get_descendants(strategy='preorder')[-1]

                if a.is_leaf() and b.is_leaf():
                    if leaf_order[a.name] > leaf_order[b.name]:
                        left, right = n.children
                        n.children = [right, left]

    # add width to branches
    if clustering_sizes_file:
        t, styles = size_clustering(t, styles, clustering_sizes_file)

    # add colors to branches
    if clustering_colors_file:
        t, ts, styles = color_clustering(t, ts, styles, clustering_colors_file)

    # add new leaf labels
    if leaf_labels_file:
        cells_labels = utils.get_cells_labels(leaf_labels_file)
        ts.show_leaf_name = False
        for name, label in cells_labels.items():
            nodes = t.search_nodes(name=name)
            assert len(nodes) == 1, nodes
            node = nodes[0]
            if name in cells_colors:
                name_face = ete3.faces.TextFace(cells_labels[name],
                                                fsize=font_size,
                                                fgcolor=cells_colors[name])
            else:
                name_face = ete3.faces.TextFace(cells_labels[name],
                                                fsize=font_size)

            name_face.margin_left = 3
            node.add_face(name_face, 0, "aligned")

    # add duplicate tags to nodes
    if duplicate_file:
        dup_labels = utils.get_dup_labels(duplicate_file)
        for name, color in dup_labels.items():
            node = node_check(name, t)
            if not node:
                continue
            dup_face = ete3.faces.TextFace('*', fsize=10, fgcolor=color)
            dup_face.margin_left = 5
            node.add_face(dup_face, column=1)

    # add y-scale to the picture
    if y_scale:
        ts.y_axis['show'] = True
        ts.y_axis['scale_type'] = tree_scale
        ts.y_axis['scale_length'] = int(root.get_farthest_leaf()[1] -
                                        root.dist + 10)

    # add legend to the tree
    if legend_file:
        legend = utils.get_legend(legend_file)
        for mark in list(legend.keys()):
            ts.legend.add_face(ete3.faces.CircleFace(2, legend[mark]),
                               column=0)
            legend_txt = ete3.faces.TextFace(mark, fsize=font_legend)
            legend_txt.margin_left = 5
            ts.legend.add_face(legend_txt, column=1)
        ts.legend_position = 4

    # set all the styles
    for n in t.traverse():
        n.set_style(styles[n.name]['style'])
    # root = ete3.faces.CircleFace(2, 'white')
    # root.border.width = 1
    # root.border.color = 'black'
    # t.add_face(root, column=0, position='float')

    # t.render("%%inline", tree_style=ts)
    return t, ts
def Main():
    sensitive_meta_data = SensitiveMetadata()
    # print( sensitive_meta_data.get_columns() )
    metadata = ParseWorkflowResults(metadataPath)
    distance = read(distancePath)
    treeFile = "".join(read(treePath))

    distanceDict = {}  #store the distance matrix as rowname:list<string>
    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]
    #region step5: tree construction
    '''
    #region create detailed tree
    
    plasmidCount = 0
    for n in t.traverse():
        if (n.is_leaf() and not n.name == "Reference"):
            mData = metadata[n.name.replace(".fa","")]
            face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_left = 10
            face.margin_right = 10
            n.add_face(face, 0, "aligned")
            face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_right = 10
            n.add_face(face, 1, "aligned")
            face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_right = 10
            n.add_face(face, 2, "aligned")
            index = 3
            if (mData.TotalPlasmids > plasmidCount):
                plasmidCount = mData.TotalPlasmids
            for i in range(0, mData.TotalPlasmids):
                face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 10
                n.add_face(face, index, "aligned")
                index+=1
                face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 10
                n.add_face(face, index, "aligned")
                index+=1

    face = faces.TextFace("Species",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    face.margin_left = 10
    (t&"Reference").add_face(face, 0, "aligned")
    face = faces.TextFace("Sequence Type",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    (t&"Reference").add_face(face, 1, "aligned")
    face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    (t&"Reference").add_face(face, 2, "aligned")
    index = 3
    for i in range(0, plasmidCount):
        face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True)
        face.border.margin = 5
        face.margin_right = 10
        (t&"Reference").add_face(face, index, "aligned")
        index+=1
        face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True)
        face.border.margin = 5
        face.margin_right = 10
        (t&"Reference").add_face(face, index, "aligned")
        index+=1

    t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts)
    
    #endregion
    '''
    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = False
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key].plasmids:
            for inc in plasmid.PlasmidRepType.split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key].ID]
                    else:
                        if metadata[key].ID not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key].ID)
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            for sensitive_data_column in sensitive_meta_data.get_columns():
                (t & "Reference").add_face(addFace(sensitive_data_column),
                                           index, "aligned")
                index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0
            mData = metadata[n.name.replace(".fa", "")]

            # pushing in sensitive data
            for sensitive_data_column in sensitive_meta_data.get_columns():
                sens_col_val = sensitive_meta_data.get_value(
                    bcid=mData.ID, column_name=sensitive_data_column)
                n.add_face(addFace(sens_col_val), index, "aligned")
                index = index + 1

            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (metadata[n.name.replace(".fa", "")].new == True):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.SequenceType), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.CarbapenemResistanceGenes), index,
                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                n.add_face(addFace(list(distanceDict[n.name])[i]), index + i,
                           "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png. or an phyloxml