def learnModel(trees, stree, gene2species, statsprefix="", filenames=None): util.tic("learn model") util.tic("find branch length distributions") lengths, used = phylo.find_branch_distrib(trees, stree, gene2species, False) debug("Total trees matching species topology: %d out of %d" % (sum(used), len(trees))) util.toc() params = {} totlens = map(sum, zip(* lengths.values())) # print output stats if statsprefix != "": writeTreeDistrib(file(statsprefix + ".lens", "w"), lengths) rates = treeDistrib2table(lengths, filenames=filenames) rates.write(statsprefix + "_rates.tab") util.tic("fitting params") for node, lens in lengths.items(): if len(lens) == 0 or max(lens) == min(lens): continue util.tic("fitting params for " + str(node.name)) param = fitNormal2(util.vdiv(lens, totlens)) params[node.name] = param util.toc() util.toc() # calc distribution of total tree length trees2 = util.mget(trees, util.findeq(True, used)) lens = map(lambda x: sum(y.dist for y in x.nodes.values()), trees2) lens = filter(lambda x: x < 20, lens) mu = stats.mean(lens) lens = filter(lambda x: x < 2*mu, lens) mu = stats.mean(lens) sigma2 = stats.variance(lens) params["baserate"] = [mu*mu/sigma2, mu/sigma2] params[stree.root.name] = [0, 1] util.toc() return params
def fitNormal2(lens): mu = stats.mean(lens) sigma = stats.sdev(lens) param, resid = stats.fitDistrib(stats.normalPdf, [mu, sigma], lens, mu - 2*sigma, mu + 2*sigma, sigma / min(30, len(lens)/5)) return param
def layout_arg(arg, leaves=None, yfunc=lambda x: x): """Layout the nodes of an ARG""" layout = {} # layout leaves if leaves is None: leafx = layout_arg_leaves(arg) else: leafx = util.list2lookup(leaves) for node in arg.postorder(): if node.is_leaf(): layout[node] = [leafx[node], yfunc(node.age)] else: layout[node] = [ stats.mean(layout[child][0] for child in node.children), yfunc(node.age)] return layout
def draw_tree(tree, brecon, stree, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=100, tmargin=100, bmargin=100, tree_color=(0, 0, 0), tree_trans_color=(0, 0, 0), stree_color=(.3, .7, .3), snode_color=(.2, .2, .7), loss_color=(1, 1, 1), loss_color_border=(.5, .5, .5), dup_color=(0, 0, 1), dup_color_border=(0, 0, 1), trans_color=(1, 1, 0), trans_color_border=(.5, .5, 0), gtrans_color=(1, 0, 0), gtrans_color_border=(.5, 0, 0), event_size=10, snames=None, rootlen=None, stree_width=.8, filename="tree.svg"): '''Takes as input a parasite tree, tree, a reconciliation file, brecon, a host tree, stree, as well as sizes and colors of the trees components and returns a drawing of the reconciliation of the parasite tree on the host tree with event nodes of specified colors''' # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale if snames is None: snames = dict((x, x) for x in stree.leaf_names()) # layout stree slayout = treelib1.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) for node in tree.preorder(): if node == list(tree.preorder())[0]: rootNode = node.name yorders[node] = [] for ev in brecon[node]: snode, event, frequency = ev if event == "spec" or event == "gene" or event == "loss": yorders[node].append(len(ylists[snode])) ylists[snode].append(node) # layout dups and transfers (y) for node in tree.postorder(): for ev in brecon[node]: snode, event, frequency = ev if event != "spec" and event != "gene" and event != "loss": # Find number of nodes on a single branch for y-coord v = [ yorders[child] for child in node.children if brecon[child][-1][0] == snode ] if len(v) == 0: yorders[node].append(0) else: yorders[node].append(stats.mean(flatten(v))) # layout node (x) xorders = { } #Dictionary to record number of nodes on a single branch for x-coord branchFrac = {} #Dictionary to record the placement of a node on a branch for node in tree.postorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen at host vertices if not node in branchFrac: branchFrac[node] = 0 else: # Transfers and duplications occur on branches v = [branchFrac[child] for child in node.children] if len(v) == 0: branchFrac[node] = 1 else: branchFrac[node] = max(v) + 1 for node in tree.preorder(): xorders[node] = [] for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen on vertices, not branches xorders[node].append(0) else: if node.parent and containsTransOrDup(node.parent, brecon): # set branchFrac to the branch Frac of the parent, they are # on the same branch branchFrac[node] = branchFrac[node.parent] if containsLoss(node, brecon): # if following a loss, first transfer/duplication event on branch xorders[node].append(1) elif not node.parent: # Root of tree xorders[node].append(0) else: xorders[node].append(maxList(xorders[node.parent]) + 1) # setup layout layout = {None: [slayout[brecon[tree.root][-1][0].parent]]} for node in tree.preorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] nx, ny = slayout[snode] px, py = slayout[snode.parent] (npx, npy) = layout[node.parent][-1] # set spacing between nodes on the same branch frac = 50 while branchFrac[node] * frac >= nx - px: frac = frac - 5 # calc x if event == "trans" or event == "gtrans": if npx > px: # transfer parent is farther forward in time than host parent x = npx + frac else: x = px + frac elif event == "dup": x = px + frac else: x = nx # calc y deltay = ny - py slope = deltay / float(nx - px) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node][n] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale if node in layout: layout[node].append((x, y)) else: layout[node] = [(x, y)] # order brecon nodes temporally brecon[node] = orderLoss(node, brecon, layout) # order layout nodes temporally layout[node] = orderLayout(node, layout) if y > max(l[1] for l in slayout.values()) + 50: print nx, ny print px, py print offset, frac print ylists[snode], yorders[node] print brecon[node] print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max( len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size ''' if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout ''' xcoords, ycoords = zip(*slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y + stree_font_size / 2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: containsL = containsLoss(node, brecon) for n in range(len(brecon[node])): x, y = layout[node][n] if containsL == False: # no loss event px, py = layout[node.parent][-1] else: # loss event present if n == 0: # event is loss px, py = layout[node.parent][-1] else: # event stems from loss px, py = layout[node][n - 1] trans = False if node.parent: snode, event, frequency = brecon[node][n] if n == 0: psnode, pevent, pfrequency = brecon[node.parent][-1] # Event stemming from a loss event else: psnode, pevent, pfrequency = brecon[node][n - 1] if pevent == "trans" or pevent == "gtrans": if psnode != snode: trans = True else: trans = False if not trans: canvas.line(x, y, px, py, color=tree_color) # draw the transfer dashed line else: arch = 20 x2 = (x * .5 + px * .5) - arch y2 = (y * .5 + py * .5) x3 = (x * .5 + px * .5) - arch y3 = (y * .5 + py * .5) # draw regular transfer dashed line if pevent == "trans": canvas.write( "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(tree_trans_color, (0, 0, 0, 0)))) # draw guilty transfer dashed line else: canvas.write( "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(gtrans_color, (0, 0, 0, 0)))) # draw events for node in tree: if node.name == rootNode: x, y = layout[node][0] canvas.polygon((x-20, y, x-50, y+30,x-50, y+15, x-90, y+15, x-90,\ y-15, x-50, y-15, x-50, y-30), strokeColor = (1,.7,.3), \ fillColor = (1,.7,.3)) canvas.text("Root Node", x-88, y+5, font_size+2,\ fillColor = (0,0,0)) for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] frequency = float(frequency) x, y = layout[node][n] o = event_size / 2.0 if event == "loss": # draw boxes, frequencies of loss events canvas.rect(x - o, y - o, event_size, event_size, fillColor=loss_color, strokeColor=loss_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=loss_color) if event == "spec": # draw boxes, frequencies of speciation events canvas.rect(x - o, y - o, event_size, event_size, fillColor=(0, 0, 0), strokeColor=(0, 0, 0)) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=(0, 0, 0)) if event == "dup": # draw boxes, frequencies of duplication events canvas.rect(x - o, y - o, event_size, event_size, fillColor=dup_color, strokeColor=dup_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=dup_color) elif event == "trans": # draw boxes, frequencies of transfer events canvas.rect(x - o, y - o, event_size, event_size, fillColor=trans_color, strokeColor=trans_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=trans_color) elif event == "gtrans": # draw boxes, frequencies of guilty transfer events canvas.rect(x - o, y - o, event_size, event_size, fillColor=gtrans_color, strokeColor=gtrans_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=gtrans_color) # draw tree leaves for node in tree: for n in range(len(brecon[node])): x, y = layout[node][n] if node.is_leaf() and brecon[node][n][1] == "gene": canvas.text(node.name, x + leaf_padding, y + font_size / 2., font_size + 2, fillColor=(0, 0, 0)) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas
def boxPlot(x_axis = 'initialFreq', dataPath = '/home/muddcs15/research/work/hemiplasy/results/', prob1 = '0.001', prob2 = '0.01', prob3 = '0.05', prob4 = '0.1', prob5 = '0.5'): """ A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency """ if x_axis == 'initialFreq' events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.error.txt', 'r') for line in events: famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t') # define number of plots to be outputed fig, axes = plt.subplots(nrows=1, ncols=2) # identify the files for each of the different initial frequencies probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt') probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt') probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt') probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt') probs5 = os.path.join(dataPath, 'probabilities-' + prob5 + '.txt') probsList = [probs1, probs2, probs3, probs4, probs5] totalPerList = [] # probability of hemiplasy compared to duploss totalAList = [] # probability of hemiplasy ocurring h = 0 # probability that ocurred by hemiplasy d = 0 # probability that ocurred by duploss # open each probability file for probFilename in probsList: hList = [] # list of probability of hemiplasy perList = [] # list of percentage with prob hemiplasy > prob duploss aveList = [] # list of average probability of hemiplasy per fam id # look at each famid for that initial frequency probFile = open(probFilename, "r") for line in probFile: sepProbs = line.split() fam = sepProbs.pop(0) famid = fam[6:] # get the probability of duploss and hemiplasy for each trial in each famid for pair in sepProbs: duploss, hemiplasy = map(float, pair.split(',')) hList.append(hemiplasy) # check whether hemiplasy is more likely or duploss if hemiplasy > duploss: h += 1 else: d += 1 # calculate the percent that likely ocurred by hemiplasy percent = float(h)/float(h+d) # get the average probability of hemiplasy for each famid ave = stats.mean(hList) # append percent by hemiplasy to perList and average for the famid to aveList perList.append(percent) aveList.append(ave) # append the lists through each famid to the large lists for each list of values totalPerList.append(perList) totalAList.append(aveList) # close file probFile.close() # define the first plot and its labels axes[0].boxplot(totalAList) axes[0].set_title('Probability of Hemiplasy') axes[0].set_xticklabels(['0.001','0.01','0.05','0.1','0.5'],minor=False) axes[0].set_xlabel('Initial Frequency') axes[0].set_ylabel('Probability') # define the second plot and its labels axes[1].boxplot(totalPerList) axes[1].set_title('Probability of Hemiplasy vs. DupLoss') axes[1].set_xticklabels(['0.001','0.01','0.05','0.1','0.5'],minor=False) axes[1].set_xlabel('Initial Frequency') axes[1].set_ylabel('Probability') # print the plots plt.show() if x_axis == 'pairs': stree = treelib.read_tree(spectree) # species tree species = stree.leaf_names() species1 = [] species2 = [] for node in stree: if len(node.leaves()) == 2: species1.append(node.children[0].name) species2.append(node.children[1].name) # identify the files for each of the different initial frequencies probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt') probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt') probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt') probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt') probsList = [probs1, probs2, probs3, probs4] totalPerList = [] # probability of hemiplasy compared to duploss totalAList = [] # probability of hemiplasy ocurring totalPairs = [] h = 0 # probability that ocurred by hemiplasy d = 0 # probability that ocurred by duploss pair1 = [] pair2 = [] pair3 = [] pair4 = [] pair5 = [] # open each probability file for probFilename in probsList: events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r') hList = [] # list of probability of hemiplasy perList = [] # list of percentage with prob hemiplasy > prob duploss aveList = [] # list of average probability of hemiplasy per fam id countTrue = 0 # look at each famid for that initial frequency probFile = open(probFilename, "r") for line in probFile: sepProbs = line.split() fam = sepProbs.pop(0) famid = fam[6:] # get the probability of duploss and hemiplasy for each trial in each famid for pair in sepProbs: duploss, hemiplasy = map(float, pair.split(',')) hList.append(hemiplasy) # check whether hemiplasy is more likely or duploss if hemiplasy > duploss: h += 1 else: d += 1 # calculate the percent that likely ocurred by hemiplasy percent = float(h)/float(h+d) # get the average probability of hemiplasy for each famid ave = stats.mean(hList) # append percent by hemiplasy to perList and average for the famid to aveList perList.append(percent) aveList.append(ave) for line in events: ev_famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t') if famid == ev_famid: countTrue += 1 for sp1, sp2 in zip(species1, species2): if (sp1 in spcs and sp2 not in spcs): spec_check = sp1 specPos = species1.index(sp1) elif (sp2 in spcs and sp1 not in spcs): spec_check = sp2 specPos = species2.index(sp2) break if specPos == 0: pair1.append(ave) if specPos == 1: pair2.append(ave) if specPos == 2: pair3.append(ave) if specPos == 3: pair4.append(ave) if specPos == 4: pair5.append(ave) events.close() # append the lists through each famid to the large lists for each list of values totalPerList.append(perList) totalAList.append(aveList) # close file probFile.close() totalPairs.append(pair1) totalPairs.append(pair2) totalPairs.append(pair3) totalPairs.append(pair4) totalPairs.append(pair5) plt.boxplot(totalPairs) plt.title('Hemiplasy by Pairs') plt.xlabel('Pair') plt.ylabel('Probability') # print the plots plt.show() if x_axis == 'dupLocation': tree = treelib.read_tree(spectree) # species tree species = stree.leaf_names() species1 = [] species2 = [] for node in stree: if len(node.leaves()) == 2: species1.append(node.children[0].name) species2.append(node.children[1].name) # define number of plots to be outputed fig, axes = plt.subplots(nrows=2, ncols=3) # identify the files for each of the different initial frequencies probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt') probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt') probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt') probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt') probsList = [probs1, probs2, probs3, probs4] totalPerList = [] # probability of hemiplasy compared to duploss totalAList = [] # probability of hemiplasy ocurring totalPairs = [] h = 0 # probability that ocurred by hemiplasy d = 0 # probability that ocurred by duploss pair1 = [] pair2 = [] pair3 = [] pair4 = [] pair5 = [] pairList = [pair1, pair2, pair3, pair4, pair5] totalFList = [] totalPDList =[] # open each probability file for probFilename in probsList: events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r') hList = [] # list of probability of hemiplasy perList = [] # list of percentage with prob hemiplasy > prob duploss aveList = [] # list of average probability of hemiplasy per fam id famList = [] PDList = [] # look at each famid for that initial frequency probFile = open(probFilename, "r") for line in probFile: sepProbs = line.split() fam = sepProbs.pop(0) famid = fam[6:] famList.append(famid) # get the probability of duploss and hemiplasy for each trial in each famid for pair in sepProbs: duploss, hemiplasy = map(float, pair.split(',')) hList.append(hemiplasy) # check whether hemiplasy is more likely or duploss if hemiplasy > duploss: h += 1 else: d += 1 # calculate the percent that likely ocurred by hemiplasy percent = float(h)/float(h+d) # get the average probability of hemiplasy for each famid ave = stats.mean(hList) # append percent by hemiplasy to perList and average for the famid to aveList perList.append(percent) aveList.append(ave) for line in events: ev_famid, locus, spcs, gns, dup, lca = line.rstrip().split('\t') if famid == ev_famid: for sp1, sp2 in zip(species1, species2): if (sp1 in spcs and sp2 not in spcs): spec_check = sp1 specPos = species1.index(sp1) elif (sp2 in spcs and sp1 not in spcs): spec_check = sp2 specPos = species2.index(sp2) break PDList.append((specPos, dup)) famNum = 0 for pos, dpl in PDList: if pos == 0: pair1.append((int(dpl), aveList[famNum])) if pos == 1: pair2.append((int(dpl), aveList[famNum])) if pos == 2: pair3.append((int(dpl), aveList[famNum])) if pos == 3: pair4.append((int(dpl), aveList[famNum])) if pos == 4: pair5.append((int(dpl), aveList[famNum])) famNum += 1 events.close() # append the lists through each famid to the large lists for each list of values totalPerList.append(perList) totalAList.append(aveList) totalFList.append(famList) totalPDList.append(PDList) # close file probFile.close() # TODO: what does this do? finalPair = collections.defaultdict(list) pairCount = 0 for pairNum in pairList: pairCount += 1 dup = collections.defaultdict(list) for (dupLoc, prob) in pairNum: dup[dupLoc].append(prob) finalPair[pairCount].extend([dup[dupLoc] for dupLoc in xrange(1,14)]) # define the first plot and its labels axes[0,0].boxplot(finalPair[1]) axes[0,0].set_title('Pair1') axes[0,0].set_xlabel('Duplication Location') axes[0,0].set_ylabel('Probability') axes[0,0].set_ylim(0,0.25) # define the second plot and its labels axes[0,1].boxplot(finalPair[2]) axes[0,1].set_title('Pair2') axes[0,1].set_xlabel('Duplication Location') axes[0,1].set_ylabel('Probability') axes[0,1].set_ylim(0,0.25) axes[0,2].boxplot(finalPair[3]) axes[0,2].set_title('Pair3') axes[0,2].set_xlabel('Duplication Location') axes[0,2].set_ylabel('Probability') axes[0,2].set_ylim(0,0.25) axes[1,0].boxplot(finalPair[4]) axes[1,0].set_title('Pair4') axes[1,0].set_xlabel('Duplication Location') axes[1,0].set_ylabel('Probability') axes[1,0].set_ylim(0,0.25) axes[1,1].boxplot(finalPair[5]) axes[1,1].set_title('Pair5') axes[1,1].set_xlabel('Duplication Location') axes[1,1].set_ylabel('Probability') axes[1,1].set_ylim(0,0.25) # print the plots plt.show()
def midroot_recon(tree, stree, recon, events, params, generate): node1, node2 = tree.root.children specs1 = [] specs2 = [] # find nearest specs/genes def walk(node, specs): if events[node] == "dup": for child in node.children: walk(child, specs) else: specs.append(node) #walk(node1, specs1) #walk(node2, specs2) specs1 = node1.leaves() specs2 = node2.leaves() def getDists(start, end): exp_dist = 0 obs_dist = 0 sstart = recon[start] send = recon[end] while sstart != send: exp_dist += params[sstart.name][0] sstart = sstart.parent while start != end: obs_dist += start.dist start = start.parent return exp_dist, obs_dist / generate diffs1 = [] for spec in specs1: if events[tree.root] == "spec": exp_dist1, obs_dist1 = getDists(spec, tree.root) else: exp_dist1, obs_dist1 = getDists(spec, node1) diffs1.append(obs_dist1 - exp_dist1) diffs2 = [] for spec in specs2: if events[tree.root] == "spec": exp_dist2, obs_dist2 = getDists(spec, tree.root) else: exp_dist2, obs_dist2 = getDists(spec, node2) diffs2.append(obs_dist2 - exp_dist2) totdist = (node1.dist + node2.dist) / generate left = node1.dist - stats.mean(diffs1) right = totdist - node2.dist + stats.mean(diffs2) #print diffs1, diffs2 #print stats.mean(diffs1), stats.mean(diffs2) mid = util.clamp((left + right) / 2.0, 0, totdist) node1.dist = mid * generate node2.dist = (totdist - mid) * generate
def boxPlot(dataPath = '/home/muddcs15/research/work/hemiplasy/results/', prob1 = '0.001', prob2 = '0.05', prob3 = '0.1', prob4 = '0.5'): """ A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency """ # define number of plots to be outputed fig, axes = plt.subplots(nrows=1, ncols=2) # identify the files for each of the different initial frequencies probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt') probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt') probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt') probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt') probsList = [probs1, probs2, probs3, probs4] totalPerList = [] # probability of hemiplasy compared to duploss totalAList = [] # probability of hemiplasy ocurring h = 0 # probability that ocurred by hemiplasy d = 0 # probability that ocurred by duploss # open each probability file for probFilename in probsList: hList = [] # list of probability of hemiplasy perList = [] # list of percentage with prob hemiplasy > prob duploss aveList = [] # list of average probability of hemiplasy per fam id # look at each famid for that initial frequency probFile = open(probFilename, "r") for line in probFile: sepProbs = line.split() fam = sepProbs.pop(0) famid = fam[6:] # get the probability of duploss and hemiplasy for each trial in each famid for pair in sepProbs: duploss, hemiplasy = map(float, pair.split(',')) hList.append(hemiplasy) # check whether hemiplasy is more likely or duploss if hemiplasy > duploss: h += 1 else: d += 1 # calculate the percent that likely ocurred by hemiplasy percent = float(h)/float(h+d) # get the average probability of hemiplasy for each famid ave = stats.mean(hList) # append percent by hemiplasy to perList and average for the famid to aveList perList.append(percent) aveList.append(ave) # append the lists through each famid to the large lists for each list of values totalPerList.append(perList) totalAList.append(aveList) # close file probFile.close() # define the first plot and its labels axes[0].boxplot(totalAList) axes[0].set_title('Probability of Hemiplasy') axes[0].set_xticklabels(['0.001','0.05','0.1','0.5'],minor=False) axes[0].set_xlabel('Initial Frequency') axes[0].set_ylabel('Probability') # define the second plot and its labels axes[1].boxplot(totalPerList) axes[1].set_title('Probability of Hemiplasy vs. DupLoss') axes[1].set_xticklabels(['0.001','0.05','0.1','0.5'],minor=False) axes[1].set_xlabel('Initial Frequency') axes[1].set_ylabel('Probability') # print the plots plt.show()
def draw_tree(tree, brecon, stree, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=100, tmargin=100, bmargin=100, tree_color=(0, 0, 0), tree_trans_color=(0, 0, 0), stree_color=(.3, .7, .3), snode_color=(.2, .2, .7), loss_color = (1,1,1), loss_color_border=(.5,.5,.5), dup_color=(0, 0, 1), dup_color_border=(0, 0, 1), trans_color=(1, 1, 0), trans_color_border=(.5, .5, 0), gtrans_color=(1,0,0), gtrans_color_border=(.5,0,0), event_size=10, snames=None, rootlen=None, stree_width=.8, filename="tree.svg" ): '''Takes as input a parasite tree, tree, a reconciliation file, brecon, a host tree, stree, as well as sizes and colors of the trees components and returns a drawing of the reconciliation of the parasite tree on the host tree with event nodes of specified colors''' # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale if snames is None: snames = dict((x, x) for x in stree.leaf_names()) # layout stree slayout = treelib1.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) for node in tree.preorder(): if node == list(tree.preorder())[0]: rootNode = node.name yorders[node] = [] for ev in brecon[node]: snode, event, frequency = ev if event == "spec" or event == "gene" or event == "loss": yorders[node].append(len(ylists[snode])) ylists[snode].append(node) # layout dups and transfers (y) for node in tree.postorder(): for ev in brecon[node]: snode, event, frequency = ev if event != "spec" and event != "gene" and event != "loss": # Find number of nodes on a single branch for y-coord v = [yorders[child] for child in node.children if brecon[child][-1][0] == snode] if len(v) == 0: yorders[node].append(0) else: yorders[node].append(stats.mean(flatten(v))) # layout node (x) xorders = {} #Dictionary to record number of nodes on a single branch for x-coord branchFrac = {} #Dictionary to record the placement of a node on a branch for node in tree.postorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen at host vertices if not node in branchFrac: branchFrac[node] = 0 else: # Transfers and duplications occur on branches v = [branchFrac[child] for child in node.children] if len(v) == 0: branchFrac[node] = 1 else: branchFrac[node] = max(v) + 1 for node in tree.preorder(): xorders[node] = [] for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen on vertices, not branches xorders[node].append(0) else: if node.parent and containsTransOrDup(node.parent, brecon): # set branchFrac to the branch Frac of the parent, they are # on the same branch branchFrac[node] = branchFrac[node.parent] if containsLoss(node, brecon): # if following a loss, first transfer/duplication event on branch xorders[node].append(1) elif not node.parent: # Root of tree xorders[node].append(0) else: xorders[node].append(maxList(xorders[node.parent])+1) # setup layout layout = {None: [slayout[brecon[tree.root][-1][0].parent]]} for node in tree.preorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] nx, ny = slayout[snode] px, py = slayout[snode.parent] (npx, npy) = layout[node.parent][-1] # set spacing between nodes on the same branch frac = 50 while branchFrac[node] * frac >= nx - px: frac = frac - 5 # calc x if event == "trans" or event == "gtrans": if npx > px: # transfer parent is farther forward in time than host parent x = npx + frac else: x = px + frac elif event =="dup": x = px + frac else: x = nx # calc y deltay = ny - py slope = deltay / float(nx-px) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node][n] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale if node in layout: layout[node].append((x, y)) else: layout[node] = [(x, y)] # order brecon nodes temporally brecon[node] = orderLoss(node, brecon, layout) # order layout nodes temporally layout[node] = orderLayout(node, layout) if y > max(l[1] for l in slayout.values()) + 50: print nx, ny print px, py print offset, frac print ylists[snode], yorders[node] print brecon[node] print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max(len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size ''' if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout ''' xcoords, ycoords = zip(* slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y+stree_font_size/2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: containsL= containsLoss(node, brecon) for n in range(len(brecon[node])): x, y = layout[node][n] if containsL == False: # no loss event px, py = layout[node.parent][-1] else: # loss event present if n == 0: # event is loss px, py = layout[node.parent][-1] else: # event stems from loss px, py = layout[node][n-1] trans = False if node.parent: snode, event, frequency = brecon[node][n] if n == 0: psnode, pevent, pfrequency = brecon[node.parent][-1] # Event stemming from a loss event else: psnode, pevent, pfrequency = brecon[node][n-1] if pevent == "trans" or pevent == "gtrans": if psnode != snode: trans = True else: trans = False if not trans: canvas.line(x, y, px, py, color=tree_color) # draw the transfer dashed line else: arch = 20 x2 = (x*.5 + px*.5) - arch y2 = (y*.5 + py*.5) x3 = (x*.5 + px*.5) - arch y3 = (y*.5 + py*.5) # draw regular transfer dashed line if pevent == "trans": canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(tree_trans_color, (0,0,0,0)))) # draw guilty transfer dashed line else: canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(gtrans_color, (0,0,0,0)))) # draw events for node in tree: if node.name == rootNode: x, y = layout[node][0] canvas.polygon((x-20, y, x-50, y+30,x-50, y+15, x-90, y+15, x-90,\ y-15, x-50, y-15, x-50, y-30), strokeColor = (1,.7,.3), \ fillColor = (1,.7,.3)) canvas.text("Root Node", x-88, y+5, font_size+2,\ fillColor = (0,0,0)) for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] frequency = float(frequency) x, y = layout[node][n] o = event_size / 2.0 if event == "loss": # draw boxes, frequencies of loss events canvas.rect(x - o, y - o, event_size, event_size, fillColor=loss_color, strokeColor=loss_color_border) canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor = loss_color) if event == "spec": # draw boxes, frequencies of speciation events canvas.rect(x - o, y - o, event_size, event_size, fillColor=(0,0,0), strokeColor=(0,0,0)) canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor = (0,0,0)) if event == "dup": # draw boxes, frequencies of duplication events canvas.rect(x - o, y - o, event_size, event_size, fillColor=dup_color, strokeColor=dup_color_border) canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=dup_color) elif event == "trans": # draw boxes, frequencies of transfer events canvas.rect(x - o, y - o, event_size, event_size, fillColor=trans_color, strokeColor=trans_color_border) canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=trans_color) elif event == "gtrans": # draw boxes, frequencies of guilty transfer events canvas.rect(x-o, y-o, event_size, event_size, fillColor=gtrans_color, strokeColor=gtrans_color_border) canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=gtrans_color) # draw tree leaves for node in tree: for n in range(len(brecon[node])): x, y = layout[node][n] if node.is_leaf() and brecon[node][n][1] == "gene": canvas.text(node.name, x + leaf_padding, y+font_size/2., font_size+2, fillColor=(0, 0, 0)) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas
def boxPlot(dataPath = '/home/muddcs15/research/work/hemiplasy/results/', prob1 = '0.001', prob2 = '0.05', prob3 = '0.1', prob4 = '0.5', spectree = '/home/muddcs15/research/work/hemiplasy/data/config/fungi.stree'): """ A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency """ stree = treelib.read_tree(spectree) # species tree species = stree.leaf_names() species1 = [] species2 = [] for node in stree: if len(node.leaves()) == 2: species1.append(node.children[0].name) species2.append(node.children[1].name) # identify the files for each of the different initial frequencies probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt') probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt') probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt') probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt') probsList = [probs1, probs2, probs3, probs4] totalPerList = [] # probability of hemiplasy compared to duploss totalAList = [] # probability of hemiplasy ocurring totalPairs = [] h = 0 # probability that ocurred by hemiplasy d = 0 # probability that ocurred by duploss pair1 = [] pair2 = [] pair3 = [] pair4 = [] pair5 = [] # open each probability file for probFilename in probsList: events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r') hList = [] # list of probability of hemiplasy perList = [] # list of percentage with prob hemiplasy > prob duploss aveList = [] # list of average probability of hemiplasy per fam id countTrue = 0 # look at each famid for that initial frequency probFile = open(probFilename, "r") for line in probFile: sepProbs = line.split() fam = sepProbs.pop(0) famid = fam[6:] # get the probability of duploss and hemiplasy for each trial in each famid for pair in sepProbs: duploss, hemiplasy = map(float, pair.split(',')) hList.append(hemiplasy) # check whether hemiplasy is more likely or duploss if hemiplasy > duploss: h += 1 else: d += 1 # calculate the percent that likely ocurred by hemiplasy percent = float(h)/float(h+d) # get the average probability of hemiplasy for each famid ave = stats.mean(hList) # append percent by hemiplasy to perList and average for the famid to aveList perList.append(percent) aveList.append(ave) for line in events: ev_famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t') if famid == ev_famid: countTrue += 1 for sp1, sp2 in zip(species1, species2): if (sp1 in spcs and sp2 not in spcs): spec_check = sp1 specPos = species1.index(sp1) elif (sp2 in spcs and sp1 not in spcs): spec_check = sp2 specPos = species2.index(sp2) break if specPos == 0: pair1.append(ave) if specPos == 1: pair2.append(ave) if specPos == 2: pair3.append(ave) if specPos == 3: pair4.append(ave) if specPos == 4: pair5.append(ave) events.close() # append the lists through each famid to the large lists for each list of values totalPerList.append(perList) totalAList.append(aveList) # close file probFile.close() totalPairs.append(pair1) totalPairs.append(pair2) totalPairs.append(pair3) totalPairs.append(pair4) totalPairs.append(pair5) plt.boxplot(totalPairs) plt.title('Hemiplasy by Pairs') plt.xlabel('Pair') plt.ylabel('Probability') # print the plots plt.show()
def mleNormal(lens): mu = stats.mean(lens) sigma = stats.sdev(lens) return mu, sigma
def draw_tree(tree, brecon, stree, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=10, tmargin=0, bmargin=0, tree_color=(0, 0, 0), tree_trans_color=(0, 0, 0), stree_color=(.6, .3, .8), snode_color=(.2, .2, .7), loss_color=(1, 1, 1), loss_color_border=(.5, .5, .5), dup_color=(1, 0, 0), dup_color_border=(.5, 0, 0), trans_color=(0, 1, 0), trans_color_border=(0, .5, 0), event_size=10, snames=None, rootlen=None, stree_width=.8, filename="tree.svg"): # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size #if label_offset is None: # label_offset = -1 if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale if snames is None: snames = dict((x, x) for x in stree.leaf_names()) # layout stree slayout = treelib1.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) for node in tree.preorder(): for ev in brecon[node]: snode, event, frequency = ev if event == "spec" or event == "gene" or event == "loss": yorders[node] = len(ylists[snode]) ylists[snode].append(node) # layout dups and transfers (y) for node in tree.postorder(): for ev in brecon[node]: snode, event, frequency = ev if event != "spec" and event != "gene" and event != "loss": v = [ yorders[child] for child in node.children if brecon[child][-1][0] == snode ] if len(v) == 0: yorders[node] = 0 else: yorders[node] = stats.mean(v) # layout node (x) xorders = {} xmax = defaultdict(lambda: 0) for node in tree.postorder(): for ev in brecon[node]: snode, event, frequency = ev if event == "spec" or event == "gene" or event == "loss": xorders[node] = 0 else: v = [ xorders[child] for child in node.children if brecon[child][-1][0] == snode ] if len(v) == 0: xorders[node] = 1 else: xorders[node] = max(v) + 1 xmax[snode] = max(xmax[snode], xorders[node]) # setup layout layout = {None: [slayout[brecon[tree.root][-1][0].parent]]} for node in tree: for ev in brecon[node]: snode, event, frequency = ev nx, ny = slayout[snode] px, py = slayout[snode.parent] # calc x frac = (xorders[node]) / float(xmax[snode] + 1) deltax = nx - px x = nx - frac * deltax # calc y deltay = ny - py slope = deltay / float(deltax) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale if node in layout: layout[node].append((x, y)) else: layout[node] = [(x, y)] brecon[node] = orderLoss(node, brecon, layout) print "Brecon = ", brecon[node] layout[node] = orderLayout(node, layout) print "Layout = ", layout[node] if y > max(l[1] for l in slayout.values()) + 50: print nx, ny print px, py print offset, frac print ylists[snode], yorders[node] print brecon[node] print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max( len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size ''' if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout ''' xcoords, ycoords = zip(*slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + .5 * yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y + stree_font_size / 2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: containsL = containsLoss(node, brecon) for n in range(len(brecon[node])): # print brecon[node] x, y = layout[node][n] # print layout[node] if containsL == False: px, py = layout[node.parent][-1] else: if brecon[node][n][1] == "loss": px, py = layout[node.parent][-1] else: px, py = layout[node][n - 1] trans = False if node.parent: for ev in brecon[node]: snode, event, frequency = ev psnode = brecon[node.parent][-1][0] while snode: if psnode == snode: break snode = snode.parent else: trans = True if not trans: canvas.line(x, y, px, py, color=tree_color) else: arch = 20 x2 = (x * .5 + px * .5) - arch y2 = (y * .5 + py * .5) x3 = (x * .5 + px * .5) - arch y3 = (y * .5 + py * .5) canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(tree_trans_color, (0, 0, 0, 0)))) # draw events for node in tree: for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] x, y = layout[node][n] o = event_size / 2.0 if event == "loss": canvas.rect(x - o, y - o, event_size, event_size, fillColor=loss_color, strokeColor=loss_color_border) canvas.text(frequency, x - o, y - o, font_size, fillColor=(1, 1, 1)) if event == "spec": canvas.text(frequency, slayout[snode][0] - leaf_padding / 2, slayout[snode][1] - font_size, font_size, fillColor=(0, 0, 0)) if event == "dup": canvas.rect(x - o, y - o, event_size, event_size, fillColor=dup_color, strokeColor=dup_color_border) canvas.text(frequency, x - o, y - o, font_size, fillColor=dup_color) elif event == "trans": canvas.rect(x - o, y - o, event_size, event_size, fillColor=trans_color, strokeColor=trans_color_border) canvas.text(frequency, x - o, y - o, font_size, fillColor=trans_color) # draw tree leaves for node in tree: for n in range(len(brecon[node])): x, y = layout[node][n] if node.is_leaf() and containsLoss(node, brecon) == False: canvas.text(node.name, x + leaf_padding, y + font_size / 2., font_size, fillColor=(0, 0, 0)) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas
def draw_tree(tree, brecon, stree, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=10, tmargin=0, bmargin=0, tree_color=(0, 0, 0), tree_trans_color=(0, 0, 0), stree_color=(.4, .4, 1), snode_color=(.2, .2, .7), dup_color=(1, 0, 0), dup_color_border=(.5, 0, 0), trans_color=(0, 1, 0), trans_color_border=(0, .5, 0), event_size=10, snames=None, rootlen=None, stree_width=.8, filename="tree.svg" ): # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size #if label_offset is None: # label_offset = -1 if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale if snames is None: snames = dict((x, x) for x in stree.leaf_names()) # layout stree slayout = treelib.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) for node in tree.preorder(): snode, event = brecon[node][-1] if event == "spec" or event == "gene": yorders[node] = len(ylists[snode]) ylists[snode].append(node) # layout dups and transfers (y) for node in tree.postorder(): snode, event = brecon[node][-1] if event != "spec" and event != "gene": v = [yorders[child] for child in node.children if brecon[child][-1][0] == snode] if len(v) == 0: yorders[node] = 0 else: yorders[node] = stats.mean(v) # layout node (x) xorders = {} xmax = defaultdict(lambda: 0) for node in tree.postorder(): snode, event = brecon[node][-1] if event == "spec" or event == "gene": xorders[node] = 0 else: v = [xorders[child] for child in node.children if brecon[child][-1][0] == snode] if len(v) == 0: xorders[node] = 1 else: xorders[node] = max(v) + 1 xmax[snode] = max(xmax[snode], xorders[node]) # setup layout layout = {None: slayout[brecon[tree.root][-1][0].parent]} for node in tree: snode = brecon[node][-1][0] nx, ny = slayout[snode] px, py = slayout[snode.parent] # calc x frac = (xorders[node]) / float(xmax[snode] + 1) deltax = nx - px x = nx - frac * deltax # calc y deltay = ny - py slope = deltay / float(deltax) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale layout[node] = (x, y) if y > max(l[1] for l in slayout.values()) + 50: print nx, ny print px, py print offset, frac print ylists[snode], yorders[node] print brecon[node] print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max(len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size ''' if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout ''' xcoords, ycoords = zip(* slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + .5 * yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y+stree_font_size/2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: x, y = layout[node] px, py = layout[node.parent] trans = False if node.parent: snode = brecon[node][-1][0] psnode = brecon[node.parent][-1][0] while snode: if psnode == snode: break snode = snode.parent else: trans = True if not trans: canvas.line(x, y, px, py, color=tree_color) else: arch = 20 x2 = (x*.5 + px*.5) - arch y2 = (y*.5 + py*.5) x3 = (x*.5 + px*.5) - arch y3 = (y*.5 + py*.5) canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(tree_trans_color, (0,0,0,0)))) # draw events for node in tree: snode, event = brecon[node][-1] x, y = layout[node] o = event_size / 2.0 if event == "dup": canvas.rect(x - o, y - o, event_size, event_size, fillColor=dup_color, strokeColor=dup_color_border) elif event == "trans": canvas.rect(x - o, y - o, event_size, event_size, fillColor=trans_color, strokeColor=trans_color_border) # draw tree leaves for node in tree: x, y = layout[node] if node.is_leaf(): canvas.text(node.name, x + leaf_padding, y+font_size/2., font_size, fillColor=(0, 0, 0)) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas
def draw_tree(tree, stree, extra, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=10, tmargin=0, bmargin=0, stree_color=(.4, .4, 1), snode_color=(.2, .2, .7), event_size=10, rootlen=None, stree_width=.8, filename=sys.stdout, labels=None, slabels=None): recon = extra["species_map"] loci = extra["locus_map"] order = extra["order"] # setup color map all_loci = sorted(set(loci.values())) num_loci = len(all_loci) colormap = util.rainbow_color_map(low=0, high=num_loci - 1) locus_color = {} for ndx, locus in enumerate(all_loci): locus_color[locus] = colormap.get(ndx) # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size #if label_offset is None: # label_offset = -1 if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale snames = dict((x, x) for x in stree.leaf_names()) if labels is None: labels = {} if slabels is None: slabels = {} # layout stree slayout = treelib.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) events = phylo.label_events(tree, recon) for node in tree.preorder(): snode = recon[node] event = events[node] if event == "spec" or event == "gene": yorders[node] = len(ylists[snode]) ylists[snode].append(node) # layout internal nodes (y) for node in tree.postorder(): snode = recon[node] event = events[node] if event != "spec" and event != "gene": v = [yorders[child] for child in node.children] yorders[node] = stats.mean(v) # layout node (x) xorders = {} xmax = defaultdict(lambda: 0) for node in tree.postorder(): snode = recon[node] event = events[node] if event == "spec" or event == "gene": xorders[node] = 0 else: v = [xorders[child] for child in node.children] xorders[node] = max(v) + 1 xmax[snode] = max(xmax[snode], xorders[node]) ## # initial order ## xpreorders = {} ## for node in tree.postorder(): ## snode = recon[node] ## event = events[node] ## if event == "spec" or event == "gene": ## xpreorders[node] = 0 ## else: ## v = [xpreorders[child] for child in node.children] ## xpreorders[node] = max(v) + 1 #### print node.name, xpreorders[node] ## # hack-ish approach : shift x until order is satisfied ## def shift(node, x): ## xpreorders[node] += x ## for child in node.children: ## if events[child] != "spec": ## shift(child, x) ## satisfied = False ## while not satisfied: ## satisfied = True ## for snode, d in order.iteritems(): ## for plocus, lst in d.iteritems(): ## # test each pair ## for m, node1 in enumerate(lst): ## x1 = xpreorders[node1] ## for node2 in lst[m+1:]: ## x2 = xpreorders[node2] #### print node1, node2, x1, x2 ## if x2 < x1: ## # violation - shift all descendants in the sbranch ## satisfied = False #### print 'violation', node1, node2, x1, x2, x1-x2+1 ## shift(node2, x1-x2+1) ## break ## # finally, "normalize" xorders ## xorders = {} ## xmax = defaultdict(lambda: 0) ## for node in tree.postorder(): ## snode = recon[node] ## xorders[node] = xpreorders[node] ## xmax[snode] = max(xmax[snode], xorders[node]) #### print node.name, xpreorders[node] # setup layout layout = {None: slayout[None]} for node in tree: snode = recon[node] nx, ny = slayout[snode] px, py = slayout[snode.parent] # calc x frac = (xorders[node]) / float(xmax[snode] + 1) deltax = nx - px x = nx - frac * deltax # calc y deltay = ny - py slope = deltay / float(deltax) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale layout[node] = (x, y) ## if y > max(l[1] for l in slayout.values()) + 50: ## print nx, ny ## print px, py ## print offset, frac ## print ylists[snode], yorders[node] ## print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max( len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size xcoords, ycoords = zip(*slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + .5 * yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color, slabels=slabels) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y + stree_font_size / 2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: x, y = layout[node] px, py = layout[node.parent] if node.parent: color = locus_color[loci[node.parent]] else: color = locus_color[loci[tree.root]] canvas.line(x, y, px, py, color=color) # draw tree names for node in tree: x, y = layout[node] px, py = layout[node.parent] if node.is_leaf(): canvas.text(node.name, x + leaf_padding, y + font_size / 2., font_size, fillColor=(0, 0, 0)) if node.name in labels: canvas.text(labels[node.name], x, y, label_size, fillColor=(0, 0, 0)) # draw events for node in tree: if node.parent: locus = loci[node] plocus = loci[node.parent] if locus != plocus: color = locus_color[locus] x, y = layout[node] o = event_size / 2.0 canvas.rect(x - o, y - o, event_size, event_size, fillColor=color, strokeColor=color) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas