Beispiel #1
0
def learnModel(trees, stree, gene2species, statsprefix="", filenames=None):
    util.tic("learn model")

    util.tic("find branch length distributions")
    lengths, used = phylo.find_branch_distrib(trees, stree, gene2species, False)
    debug("Total trees matching species topology: %d out of %d" % 
          (sum(used), len(trees)))
    util.toc()
    
    params = {}
    
    totlens = map(sum, zip(* lengths.values()))
    
    # print output stats
    if statsprefix != "":
        writeTreeDistrib(file(statsprefix + ".lens", "w"), lengths)
        rates = treeDistrib2table(lengths, filenames=filenames)
        rates.write(statsprefix + "_rates.tab")
    
    
    util.tic("fitting params")
    for node, lens in lengths.items():
        if len(lens) == 0 or max(lens) == min(lens):
            continue
        
        util.tic("fitting params for " + str(node.name))
        param = fitNormal2(util.vdiv(lens, totlens))
        
        params[node.name] = param
        util.toc()
    util.toc()
    
    # calc distribution of total tree length
    trees2 = util.mget(trees, util.findeq(True, used))
    lens = map(lambda x: sum(y.dist for y in x.nodes.values()), trees2)
    lens = filter(lambda x: x < 20, lens)
    mu = stats.mean(lens)
    lens = filter(lambda x: x < 2*mu, lens)
    mu = stats.mean(lens)
    sigma2 = stats.variance(lens)
    params["baserate"] = [mu*mu/sigma2, mu/sigma2]
    params[stree.root.name] = [0, 1]
    
    util.toc()
    
    return params
Beispiel #2
0
def fitNormal2(lens):
    mu = stats.mean(lens)
    sigma = stats.sdev(lens)
    param, resid = stats.fitDistrib(stats.normalPdf, 
                                    [mu, sigma],
                                    lens,
                                    mu - 2*sigma,
                                    mu + 2*sigma,
                                    sigma / min(30, len(lens)/5))
    return param
def layout_arg(arg, leaves=None, yfunc=lambda x: x):
    """Layout the nodes of an ARG"""

    layout = {}

    # layout leaves
    if leaves is None:
        leafx = layout_arg_leaves(arg)
    else:
        leafx = util.list2lookup(leaves)

    for node in arg.postorder():
        if node.is_leaf():
            layout[node] = [leafx[node], yfunc(node.age)]
        else:
            layout[node] = [
                stats.mean(layout[child][0] for child in node.children),
                yfunc(node.age)]

    return layout
Beispiel #4
0
def layout_arg(arg, leaves=None, yfunc=lambda x: x):
    """Layout the nodes of an ARG"""

    layout = {}

    # layout leaves
    if leaves is None:
        leafx = layout_arg_leaves(arg)
    else:
        leafx = util.list2lookup(leaves)

    for node in arg.postorder():
        if node.is_leaf():
            layout[node] = [leafx[node], yfunc(node.age)]
        else:
            layout[node] = [
                stats.mean(layout[child][0] for child in node.children),
                yfunc(node.age)]

    return layout
def draw_tree(tree,
              brecon,
              stree,
              xscale=100,
              yscale=100,
              leaf_padding=10,
              label_size=None,
              label_offset=None,
              font_size=12,
              stree_font_size=20,
              canvas=None,
              autoclose=True,
              rmargin=10,
              lmargin=100,
              tmargin=100,
              bmargin=100,
              tree_color=(0, 0, 0),
              tree_trans_color=(0, 0, 0),
              stree_color=(.3, .7, .3),
              snode_color=(.2, .2, .7),
              loss_color=(1, 1, 1),
              loss_color_border=(.5, .5, .5),
              dup_color=(0, 0, 1),
              dup_color_border=(0, 0, 1),
              trans_color=(1, 1, 0),
              trans_color_border=(.5, .5, 0),
              gtrans_color=(1, 0, 0),
              gtrans_color_border=(.5, 0, 0),
              event_size=10,
              snames=None,
              rootlen=None,
              stree_width=.8,
              filename="tree.svg"):
    '''Takes as input a parasite tree, tree, a reconciliation file, brecon, a host tree, stree, as well as
    sizes and colors of the trees components and returns a drawing of the reconciliation of the parasite 
    tree on the host tree with event nodes of specified colors'''
    # set defaults
    font_ratio = 8. / 11.
    if label_size is None:
        label_size = .7 * font_size

    if sum(x.dist for x in tree.nodes.values()) == 0:
        legend_scale = False
        minlen = xscale

    if snames is None:
        snames = dict((x, x) for x in stree.leaf_names())

    # layout stree
    slayout = treelib1.layout_tree(stree, xscale, yscale)
    if rootlen is None:
        rootlen = .1 * max(l[0] for l in slayout.values())

    # setup slayout
    x, y = slayout[stree.root]
    slayout[None] = (x - rootlen, y)
    for node, (x, y) in slayout.items():
        slayout[node] = (x + rootlen, y - .5 * yscale)

    # layout tree
    ylists = defaultdict(lambda: [])
    yorders = {}
    # layout speciations and genes (y)
    for node in tree.preorder():
        if node == list(tree.preorder())[0]:
            rootNode = node.name
        yorders[node] = []
        for ev in brecon[node]:
            snode, event, frequency = ev
            if event == "spec" or event == "gene" or event == "loss":
                yorders[node].append(len(ylists[snode]))
                ylists[snode].append(node)

    # layout dups and transfers (y)
    for node in tree.postorder():

        for ev in brecon[node]:
            snode, event, frequency = ev
            if event != "spec" and event != "gene" and event != "loss":
                # Find number of nodes on a single branch for y-coord
                v = [
                    yorders[child] for child in node.children
                    if brecon[child][-1][0] == snode
                ]
                if len(v) == 0:
                    yorders[node].append(0)
                else:
                    yorders[node].append(stats.mean(flatten(v)))

    # layout node (x)
    xorders = {
    }  #Dictionary to record number of nodes on a single branch for x-coord
    branchFrac = {}  #Dictionary to record the placement of a node on a branch
    for node in tree.postorder():
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            if event == "spec" or event == "gene" or event == "loss":
                # Speciation, gene, and loss events happen at host vertices
                if not node in branchFrac:
                    branchFrac[node] = 0
            else:  # Transfers and duplications occur on branches
                v = [branchFrac[child] for child in node.children]
                if len(v) == 0:
                    branchFrac[node] = 1
                else:
                    branchFrac[node] = max(v) + 1

    for node in tree.preorder():
        xorders[node] = []
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            if event == "spec" or event == "gene" or event == "loss":
                # Speciation, gene, and loss events happen on vertices, not branches
                xorders[node].append(0)
            else:
                if node.parent and containsTransOrDup(node.parent, brecon):
                    # set branchFrac to the branch Frac of the parent, they are
                    # on the same branch
                    branchFrac[node] = branchFrac[node.parent]
                if containsLoss(node, brecon):
                    # if following a loss, first transfer/duplication event on branch
                    xorders[node].append(1)
                elif not node.parent:  # Root of tree
                    xorders[node].append(0)
                else:
                    xorders[node].append(maxList(xorders[node.parent]) + 1)

    # setup layout
    layout = {None: [slayout[brecon[tree.root][-1][0].parent]]}
    for node in tree.preorder():
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            nx, ny = slayout[snode]
            px, py = slayout[snode.parent]
            (npx, npy) = layout[node.parent][-1]
            # set spacing between nodes on the same branch
            frac = 50
            while branchFrac[node] * frac >= nx - px:
                frac = frac - 5

        # calc x
            if event == "trans" or event == "gtrans":
                if npx > px:  # transfer parent is farther forward in time than host parent
                    x = npx + frac
                else:
                    x = px + frac
            elif event == "dup":
                x = px + frac
            else:
                x = nx
            # calc y

            deltay = ny - py
            slope = deltay / float(nx - px)

            deltax2 = x - px
            deltay2 = slope * deltax2
            offset = py + deltay2
            frac = (yorders[node][n] +
                    1) / float(max(len(ylists[snode]), 1) + 1)
            y = offset + (frac - .5) * stree_width * yscale

            if node in layout: layout[node].append((x, y))
            else:
                layout[node] = [(x, y)]

        # order brecon nodes temporally
        brecon[node] = orderLoss(node, brecon, layout)
        # order layout nodes temporally
        layout[node] = orderLayout(node, layout)

        if y > max(l[1] for l in slayout.values()) + 50:
            print nx, ny
            print px, py
            print offset, frac
            print ylists[snode], yorders[node]
            print brecon[node]
            print node, snode, layout[node]

    # layout label sizes
    max_label_size = max(len(x.name)
                         for x in tree.leaves()) * font_ratio * font_size
    max_slabel_size = max(
        len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size
    '''
    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)
    
    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None
    
    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout
    '''

    xcoords, ycoords = zip(*slayout.values())
    maxwidth = max(xcoords) + max_label_size + max_slabel_size
    maxheight = max(ycoords) + yscale

    # initialize canvas
    if canvas is None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)

        canvas.beginSvg(width, height)
        canvas.beginStyle("font-family: \"Sans\";")

        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    canvas.beginTransform(("translate", lmargin, tmargin))

    draw_stree(canvas,
               stree,
               slayout,
               yscale=yscale,
               stree_width=stree_width,
               stree_color=stree_color,
               snode_color=snode_color)

    # draw stree leaves
    for node in stree:
        x, y = slayout[node]
        if node.is_leaf():
            canvas.text(snames[node.name],
                        x + leaf_padding + max_label_size,
                        y + stree_font_size / 2.,
                        stree_font_size,
                        fillColor=snode_color)

    # draw tree

    for node in tree:

        containsL = containsLoss(node, brecon)
        for n in range(len(brecon[node])):
            x, y = layout[node][n]

            if containsL == False:  # no loss event
                px, py = layout[node.parent][-1]
            else:  # loss event present
                if n == 0:  # event is loss
                    px, py = layout[node.parent][-1]
                else:  # event stems from loss
                    px, py = layout[node][n - 1]

            trans = False

            if node.parent:
                snode, event, frequency = brecon[node][n]
                if n == 0:
                    psnode, pevent, pfrequency = brecon[node.parent][-1]

                # Event stemming from a loss event
                else:
                    psnode, pevent, pfrequency = brecon[node][n - 1]
                if pevent == "trans" or pevent == "gtrans":
                    if psnode != snode:
                        trans = True
                else:
                    trans = False

                if not trans:
                    canvas.line(x, y, px, py, color=tree_color)

                # draw the transfer dashed line
                else:
                    arch = 20
                    x2 = (x * .5 + px * .5) - arch
                    y2 = (y * .5 + py * .5)
                    x3 = (x * .5 + px * .5) - arch
                    y3 = (y * .5 + py * .5)
                    # draw regular transfer dashed line
                    if pevent == "trans":
                        canvas.write(
                            "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                            (x, y, x2, y2, x3, y3, px, py,
                             " style='stroke-dasharray: 4, 2' " +
                             svg.colorFields(tree_trans_color, (0, 0, 0, 0))))
                    # draw guilty transfer dashed line
                    else:
                        canvas.write(
                            "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                            (x, y, x2, y2, x3, y3, px, py,
                             " style='stroke-dasharray: 4, 2' " +
                             svg.colorFields(gtrans_color, (0, 0, 0, 0))))

    # draw events
    for node in tree:
        if node.name == rootNode:
            x, y = layout[node][0]
            canvas.polygon((x-20, y, x-50, y+30,x-50, y+15, x-90, y+15, x-90,\
             y-15, x-50, y-15, x-50, y-30), strokeColor = (1,.7,.3), \
             fillColor = (1,.7,.3))

            canvas.text("Root Node", x-88, y+5, font_size+2,\
                fillColor = (0,0,0))
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            frequency = float(frequency)
            x, y = layout[node][n]
            o = event_size / 2.0
            if event == "loss":  # draw boxes, frequencies of loss events
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=loss_color,
                            strokeColor=loss_color_border)
                canvas.text("{:.3f}".format(frequency) + node.name,
                            x - o,
                            y - o,
                            font_size + 2,
                            fillColor=loss_color)

            if event == "spec":  # draw boxes, frequencies of speciation events
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=(0, 0, 0),
                            strokeColor=(0, 0, 0))
                canvas.text("{:.3f}".format(frequency) + node.name,
                            x - o,
                            y - o,
                            font_size + 2,
                            fillColor=(0, 0, 0))

            if event == "dup":  # draw boxes, frequencies of duplication events
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=dup_color,
                            strokeColor=dup_color_border)
                canvas.text("{:.3f}".format(frequency) + node.name,
                            x - o,
                            y - o,
                            font_size + 2,
                            fillColor=dup_color)

            elif event == "trans":  # draw boxes, frequencies of transfer events
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=trans_color,
                            strokeColor=trans_color_border)
                canvas.text("{:.3f}".format(frequency) + node.name,
                            x - o,
                            y - o,
                            font_size + 2,
                            fillColor=trans_color)

            elif event == "gtrans":  # draw boxes, frequencies of guilty transfer events
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=gtrans_color,
                            strokeColor=gtrans_color_border)
                canvas.text("{:.3f}".format(frequency) + node.name,
                            x - o,
                            y - o,
                            font_size + 2,
                            fillColor=gtrans_color)

    # draw tree leaves
    for node in tree:
        for n in range(len(brecon[node])):
            x, y = layout[node][n]
            if node.is_leaf() and brecon[node][n][1] == "gene":
                canvas.text(node.name,
                            x + leaf_padding,
                            y + font_size / 2.,
                            font_size + 2,
                            fillColor=(0, 0, 0))

    canvas.endTransform()

    if autoclose:
        canvas.endStyle()
        canvas.endSvg()

    return canvas
def boxPlot(x_axis = 'initialFreq',
            dataPath = '/home/muddcs15/research/work/hemiplasy/results/',
            prob1 = '0.001',
            prob2 = '0.01',
            prob3 = '0.05',
            prob4 = '0.1',
            prob5 = '0.5'):
    """
    A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency
    """

    if x_axis == 'initialFreq'
        events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.error.txt', 'r')

        for line in events:
            famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t')

        # define number of plots to be outputed
        fig, axes = plt.subplots(nrows=1, ncols=2)

        # identify the files for each of the different initial frequencies
        probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt')
        probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt')
        probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt')
        probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt')
        probs5 = os.path.join(dataPath, 'probabilities-' + prob5 + '.txt')
        probsList = [probs1, probs2, probs3, probs4, probs5]

        totalPerList = []   # probability of hemiplasy compared to duploss
        totalAList = []     # probability of hemiplasy ocurring

        h = 0 # probability that ocurred by hemiplasy
        d = 0 # probability that ocurred by duploss

        # open each probability file
        for probFilename in probsList:

            
            hList = []      # list of probability of hemiplasy
            perList = []    # list of percentage with prob hemiplasy > prob duploss
            aveList = []    # list of average probability of hemiplasy per fam id

            # look at each famid for that initial frequency
            probFile = open(probFilename, "r")
            for line in probFile:
                sepProbs = line.split()
                fam = sepProbs.pop(0)
                famid = fam[6:]

                # get the probability of duploss and hemiplasy for each trial in each famid
                for pair in sepProbs:
                    duploss, hemiplasy = map(float, pair.split(','))
                    hList.append(hemiplasy)
                    # check whether hemiplasy is more likely or duploss
                    if hemiplasy > duploss:
                        h += 1
                    else:
                        d += 1
                        
                # calculate the percent that likely ocurred by hemiplasy            
                percent = float(h)/float(h+d)
                # get the average probability of hemiplasy for each famid
                ave = stats.mean(hList)

                # append percent by hemiplasy to perList and average for the famid to aveList
                perList.append(percent)
                aveList.append(ave)
                
            # append the lists through each famid to the large lists for each list of values
            totalPerList.append(perList)
            totalAList.append(aveList)

            # close file
            probFile.close()

        # define the first plot and its labels
        axes[0].boxplot(totalAList)
        axes[0].set_title('Probability of Hemiplasy')
        axes[0].set_xticklabels(['0.001','0.01','0.05','0.1','0.5'],minor=False)
        axes[0].set_xlabel('Initial Frequency')
        axes[0].set_ylabel('Probability')

        # define the second plot and its labels
        axes[1].boxplot(totalPerList)
        axes[1].set_title('Probability of Hemiplasy vs. DupLoss')
        axes[1].set_xticklabels(['0.001','0.01','0.05','0.1','0.5'],minor=False)
        axes[1].set_xlabel('Initial Frequency')
        axes[1].set_ylabel('Probability')

        # print the plots
        plt.show()

    if x_axis == 'pairs':
        stree = treelib.read_tree(spectree) # species tree
        species = stree.leaf_names()
        species1 = []
        species2 = []
        for node in stree:
            if len(node.leaves()) == 2:
                species1.append(node.children[0].name)
                species2.append(node.children[1].name)

        # identify the files for each of the different initial frequencies
        probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt')
        probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt')
        probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt')
        probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt')
        probsList = [probs1, probs2, probs3, probs4]

        totalPerList = []   # probability of hemiplasy compared to duploss
        totalAList = []     # probability of hemiplasy ocurring
        totalPairs = []

        h = 0 # probability that ocurred by hemiplasy
        d = 0 # probability that ocurred by duploss

        pair1 = []
        pair2 = []
        pair3 = []
        pair4 = []
        pair5 = []

        # open each probability file
        for probFilename in probsList:
            events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r')
            
            hList = []      # list of probability of hemiplasy
            perList = []    # list of percentage with prob hemiplasy > prob duploss
            aveList = []    # list of average probability of hemiplasy per fam id

            countTrue = 0

            # look at each famid for that initial frequency
            probFile = open(probFilename, "r")
            for line in probFile:
                sepProbs = line.split()
                fam = sepProbs.pop(0)
                famid = fam[6:]

                # get the probability of duploss and hemiplasy for each trial in each famid
                for pair in sepProbs:
                    duploss, hemiplasy = map(float, pair.split(','))
                    hList.append(hemiplasy)
                    # check whether hemiplasy is more likely or duploss
                    if hemiplasy > duploss:
                        h += 1
                    else:
                        d += 1

                # calculate the percent that likely ocurred by hemiplasy            
                percent = float(h)/float(h+d)
                # get the average probability of hemiplasy for each famid
                ave = stats.mean(hList)

                # append percent by hemiplasy to perList and average for the famid to aveList
                perList.append(percent)
                aveList.append(ave)
                
                for line in events:
                    ev_famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t')
                    if famid == ev_famid:
                        countTrue += 1
                        for sp1, sp2 in zip(species1, species2):
                            if (sp1 in spcs and sp2 not in spcs):
                                spec_check = sp1
                                specPos = species1.index(sp1)
                                
                            elif (sp2 in spcs and sp1 not in spcs):
                                spec_check = sp2
                                specPos = species2.index(sp2)
                        break
                        
                if specPos == 0:
                    pair1.append(ave)
                if specPos == 1:
                    pair2.append(ave)
                if specPos == 2:
                    pair3.append(ave)
                if specPos == 3:
                    pair4.append(ave)
                if specPos == 4:
                    pair5.append(ave)
            events.close()
                    
            # append the lists through each famid to the large lists for each list of values
            totalPerList.append(perList)
            
            totalAList.append(aveList)
            

            # close file
            probFile.close()
            
        totalPairs.append(pair1)
        totalPairs.append(pair2)
        totalPairs.append(pair3)
        totalPairs.append(pair4)
        totalPairs.append(pair5)
        
        plt.boxplot(totalPairs)
        plt.title('Hemiplasy by Pairs')
        plt.xlabel('Pair')
        plt.ylabel('Probability')
        
        # print the plots
        plt.show()

    if x_axis == 'dupLocation':
        tree = treelib.read_tree(spectree) # species tree
        species = stree.leaf_names()
        species1 = []
        species2 = []
        for node in stree:
            if len(node.leaves()) == 2:
                species1.append(node.children[0].name)
                species2.append(node.children[1].name)
        
        # define number of plots to be outputed
        fig, axes = plt.subplots(nrows=2, ncols=3)

        # identify the files for each of the different initial frequencies
        probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt')
        probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt')
        probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt')
        probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt')
        probsList = [probs1, probs2, probs3, probs4]

        totalPerList = []   # probability of hemiplasy compared to duploss
        totalAList = []     # probability of hemiplasy ocurring
        totalPairs = []

        h = 0 # probability that ocurred by hemiplasy
        d = 0 # probability that ocurred by duploss

        pair1 = []
        pair2 = []
        pair3 = []
        pair4 = []
        pair5 = []
        pairList = [pair1, pair2, pair3, pair4, pair5]

        totalFList = []
        totalPDList =[]

        # open each probability file
        for probFilename in probsList:
            events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r')
            
            hList = []      # list of probability of hemiplasy
            perList = []    # list of percentage with prob hemiplasy > prob duploss
            aveList = []    # list of average probability of hemiplasy per fam id
            famList = []
            PDList = []

            # look at each famid for that initial frequency
            probFile = open(probFilename, "r")
            for line in probFile:
                sepProbs = line.split()
                fam = sepProbs.pop(0)
                famid = fam[6:]
                famList.append(famid)
                
                # get the probability of duploss and hemiplasy for each trial in each famid
                for pair in sepProbs:
                    duploss, hemiplasy = map(float, pair.split(','))
                    hList.append(hemiplasy)
                    # check whether hemiplasy is more likely or duploss
                    if hemiplasy > duploss:
                        h += 1
                    else:
                        d += 1

                # calculate the percent that likely ocurred by hemiplasy            
                percent = float(h)/float(h+d)
                # get the average probability of hemiplasy for each famid
                ave = stats.mean(hList)

                # append percent by hemiplasy to perList and average for the famid to aveList
                perList.append(percent)
                aveList.append(ave)
                
                for line in events:
                    ev_famid, locus, spcs, gns, dup, lca = line.rstrip().split('\t')
                    if famid == ev_famid:
                        for sp1, sp2 in zip(species1, species2):
                            if (sp1 in spcs and sp2 not in spcs):
                                spec_check = sp1
                                specPos = species1.index(sp1)
                                
                            elif (sp2 in spcs and sp1 not in spcs):
                                spec_check = sp2
                                specPos = species2.index(sp2)
                        break
                
                PDList.append((specPos, dup))

            famNum = 0
            for pos, dpl in PDList:
                if pos == 0:
                    pair1.append((int(dpl), aveList[famNum]))
                if pos == 1:
                    pair2.append((int(dpl), aveList[famNum]))
                if pos == 2:
                    pair3.append((int(dpl), aveList[famNum]))
                if pos == 3:
                    pair4.append((int(dpl), aveList[famNum]))
                if pos == 4:
                    pair5.append((int(dpl), aveList[famNum]))
                famNum += 1
            events.close()
                    
            # append the lists through each famid to the large lists for each list of values
            totalPerList.append(perList)
            totalAList.append(aveList)
            totalFList.append(famList)
            totalPDList.append(PDList)

            # close file
            probFile.close()

        # TODO: what does this do?
        finalPair = collections.defaultdict(list)
        
        pairCount = 0
        for pairNum in pairList:
            pairCount += 1
            dup = collections.defaultdict(list)
            
            for (dupLoc, prob) in pairNum:
                dup[dupLoc].append(prob)

            finalPair[pairCount].extend([dup[dupLoc] for dupLoc in xrange(1,14)])
       
                
        # define the first plot and its labels
        axes[0,0].boxplot(finalPair[1])
        axes[0,0].set_title('Pair1')
        axes[0,0].set_xlabel('Duplication Location')
        axes[0,0].set_ylabel('Probability')
        axes[0,0].set_ylim(0,0.25)

        # define the second plot and its labels
        axes[0,1].boxplot(finalPair[2])
        axes[0,1].set_title('Pair2')
        axes[0,1].set_xlabel('Duplication Location')
        axes[0,1].set_ylabel('Probability')
        axes[0,1].set_ylim(0,0.25)

        axes[0,2].boxplot(finalPair[3])
        axes[0,2].set_title('Pair3')
        axes[0,2].set_xlabel('Duplication Location')
        axes[0,2].set_ylabel('Probability')
        axes[0,2].set_ylim(0,0.25)

        axes[1,0].boxplot(finalPair[4])
        axes[1,0].set_title('Pair4')
        axes[1,0].set_xlabel('Duplication Location')
        axes[1,0].set_ylabel('Probability')
        axes[1,0].set_ylim(0,0.25)

        axes[1,1].boxplot(finalPair[5])
        axes[1,1].set_title('Pair5')
        axes[1,1].set_xlabel('Duplication Location')
        axes[1,1].set_ylabel('Probability')
        axes[1,1].set_ylim(0,0.25)

        # print the plots
        plt.show()
Beispiel #7
0
def midroot_recon(tree, stree, recon, events, params, generate):

    node1, node2 = tree.root.children

    specs1 = []
    specs2 = []
    
    # find nearest specs/genes
    def walk(node, specs):
        if events[node] == "dup":
            for child in node.children:
                walk(child, specs)
        else:
            specs.append(node)
    #walk(node1, specs1)
    #walk(node2, specs2)
    specs1 = node1.leaves()
    specs2 = node2.leaves()
    
    def getDists(start, end):
        exp_dist = 0
        obs_dist = 0

        sstart = recon[start]
        send = recon[end]
        while sstart != send:
            exp_dist += params[sstart.name][0]
            sstart = sstart.parent

        while start != end:
            obs_dist += start.dist
            start = start.parent

        return exp_dist, obs_dist / generate
    
    diffs1 = []
    for spec in specs1:
        if events[tree.root] == "spec":
            exp_dist1, obs_dist1 = getDists(spec, tree.root)
        else:
            exp_dist1, obs_dist1 = getDists(spec, node1)
        diffs1.append(obs_dist1 - exp_dist1)        

    diffs2 = []
    for spec in specs2:
        if events[tree.root] == "spec":
            exp_dist2, obs_dist2 = getDists(spec, tree.root)
        else:
            exp_dist2, obs_dist2 = getDists(spec, node2)
        diffs2.append(obs_dist2 - exp_dist2)
    
    totdist = (node1.dist + node2.dist) / generate

    left = node1.dist - stats.mean(diffs1)
    right =  totdist - node2.dist + stats.mean(diffs2)
    
    #print diffs1, diffs2    
    #print stats.mean(diffs1), stats.mean(diffs2)
    
    mid = util.clamp((left + right) / 2.0, 0, totdist)
    
    node1.dist = mid * generate
    node2.dist = (totdist - mid) * generate
def boxPlot(dataPath = '/home/muddcs15/research/work/hemiplasy/results/',
            prob1 = '0.001',
            prob2 = '0.05',
            prob3 = '0.1',
            prob4 = '0.5'):
    """
    A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency
    """

    # define number of plots to be outputed
    fig, axes = plt.subplots(nrows=1, ncols=2)

    # identify the files for each of the different initial frequencies
    probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt')
    probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt')
    probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt')
    probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt')
    probsList = [probs1, probs2, probs3, probs4]

    totalPerList = []   # probability of hemiplasy compared to duploss
    totalAList = []     # probability of hemiplasy ocurring

    h = 0 # probability that ocurred by hemiplasy
    d = 0 # probability that ocurred by duploss

    # open each probability file
    for probFilename in probsList:

        
        hList = []      # list of probability of hemiplasy
        perList = []    # list of percentage with prob hemiplasy > prob duploss
        aveList = []    # list of average probability of hemiplasy per fam id

        # look at each famid for that initial frequency
        probFile = open(probFilename, "r")
        for line in probFile:
            sepProbs = line.split()
            fam = sepProbs.pop(0)
            famid = fam[6:]

            # get the probability of duploss and hemiplasy for each trial in each famid
            for pair in sepProbs:
                duploss, hemiplasy = map(float, pair.split(','))
                hList.append(hemiplasy)
                # check whether hemiplasy is more likely or duploss
                if hemiplasy > duploss:
                    h += 1
                else:
                    d += 1
                    
            # calculate the percent that likely ocurred by hemiplasy            
            percent = float(h)/float(h+d)
            # get the average probability of hemiplasy for each famid
            ave = stats.mean(hList)

            # append percent by hemiplasy to perList and average for the famid to aveList
            perList.append(percent)
            aveList.append(ave)
            
        # append the lists through each famid to the large lists for each list of values
        totalPerList.append(perList)
        totalAList.append(aveList)

        # close file
        probFile.close()

    # define the first plot and its labels
    axes[0].boxplot(totalAList)
    axes[0].set_title('Probability of Hemiplasy')
    axes[0].set_xticklabels(['0.001','0.05','0.1','0.5'],minor=False)
    axes[0].set_xlabel('Initial Frequency')
    axes[0].set_ylabel('Probability')

    # define the second plot and its labels
    axes[1].boxplot(totalPerList)
    axes[1].set_title('Probability of Hemiplasy vs. DupLoss')
    axes[1].set_xticklabels(['0.001','0.05','0.1','0.5'],minor=False)
    axes[1].set_xlabel('Initial Frequency')
    axes[1].set_ylabel('Probability')

    # print the plots
    plt.show()
def draw_tree(tree, brecon, stree,
              xscale=100, yscale=100,
              leaf_padding=10, 
              label_size=None,
              label_offset=None,
              font_size=12,
              stree_font_size=20,
              canvas=None, autoclose=True,
              rmargin=10, lmargin=100, tmargin=100, bmargin=100,
              tree_color=(0, 0, 0),
              tree_trans_color=(0, 0, 0),
              stree_color=(.3, .7, .3),
              snode_color=(.2, .2, .7),
              loss_color = (1,1,1),
              loss_color_border=(.5,.5,.5),
              dup_color=(0, 0, 1),
              dup_color_border=(0, 0, 1),
              trans_color=(1, 1, 0),
              trans_color_border=(.5, .5, 0),
              gtrans_color=(1,0,0),
              gtrans_color_border=(.5,0,0),
              event_size=10,
              snames=None,
              rootlen=None,
              stree_width=.8,
              filename="tree.svg"
              ):
    '''Takes as input a parasite tree, tree, a reconciliation file, brecon, a host tree, stree, as well as
    sizes and colors of the trees components and returns a drawing of the reconciliation of the parasite 
    tree on the host tree with event nodes of specified colors'''
    # set defaults
    font_ratio = 8. / 11.    
    if label_size is None:
        label_size = .7 * font_size


    if sum(x.dist for x in tree.nodes.values()) == 0:
        legend_scale = False
        minlen = xscale

    if snames is None:
        snames = dict((x, x) for x in stree.leaf_names())

    # layout stree
    slayout = treelib1.layout_tree(stree, xscale, yscale)
    if rootlen is None:
        rootlen = .1 * max(l[0] for l in slayout.values())

    # setup slayout
    x, y = slayout[stree.root]
    slayout[None] =  (x - rootlen, y)
    for node, (x, y) in slayout.items():
        slayout[node] = (x + rootlen, y  - .5 * yscale)

    # layout tree
    ylists = defaultdict(lambda: [])
    yorders = {}
    # layout speciations and genes (y)
    for node in tree.preorder():
        if node == list(tree.preorder())[0]:
            rootNode = node.name
        yorders[node] = []
        for ev in brecon[node]:
            snode, event, frequency = ev
            if event == "spec" or event == "gene" or event == "loss":
                yorders[node].append(len(ylists[snode]))
                ylists[snode].append(node)


    # layout dups and transfers (y)
    for node in tree.postorder():

        for ev in brecon[node]:
            snode, event, frequency = ev
            if event != "spec" and event != "gene" and event != "loss":
                # Find number of nodes on a single branch for y-coord
                v = [yorders[child]
                    for child in node.children
                    if brecon[child][-1][0] == snode]
                if len(v) == 0:
                    yorders[node].append(0)
                else:
                    yorders[node].append(stats.mean(flatten(v)))

    # layout node (x)
    xorders = {} #Dictionary to record number of nodes on a single branch for x-coord
    branchFrac = {} #Dictionary to record the placement of a node on a branch
    for node in tree.postorder():
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            if event == "spec" or event == "gene" or event == "loss":
                # Speciation, gene, and loss events happen at host vertices
                if not node in branchFrac:
                    branchFrac[node] = 0
            else: # Transfers and duplications occur on branches
                v = [branchFrac[child] for child in node.children]
                if len(v) == 0:
                    branchFrac[node] = 1
                else:
                    branchFrac[node] = max(v) + 1

    for node in tree.preorder():
        xorders[node] = []
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            if event == "spec" or event == "gene" or event == "loss":
                # Speciation, gene, and loss events happen on vertices, not branches
                    xorders[node].append(0)
            else:
                if node.parent and containsTransOrDup(node.parent, brecon):
                    # set branchFrac to the branch Frac of the parent, they are
                    # on the same branch
                    branchFrac[node] = branchFrac[node.parent]
                if containsLoss(node, brecon):
                    # if following a loss, first transfer/duplication event on branch
                    xorders[node].append(1)
                elif not node.parent: # Root of tree
                    xorders[node].append(0)
                else:
                    xorders[node].append(maxList(xorders[node.parent])+1)
              
    # setup layout
    layout = {None: [slayout[brecon[tree.root][-1][0].parent]]}
    for node in tree.preorder():
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            nx, ny = slayout[snode]
            px, py = slayout[snode.parent]
            (npx, npy) = layout[node.parent][-1]
            # set spacing between nodes on the same branch
            frac = 50
            while branchFrac[node] * frac >= nx - px:
                frac = frac - 5


        # calc x
            if event == "trans" or event == "gtrans":
                if npx > px: # transfer parent is farther forward in time than host parent
                    x = npx + frac
                else: x = px + frac
            elif event =="dup":
                x = px + frac
            else: x = nx
        # calc y

            deltay = ny - py
            slope = deltay / float(nx-px)

            deltax2 = x - px
            deltay2 = slope * deltax2
            offset = py + deltay2
            frac = (yorders[node][n] + 1) / float(max(len(ylists[snode]), 1) + 1)
            y = offset  + (frac - .5) * stree_width * yscale

            if node in layout: layout[node].append((x, y))
            else:
                layout[node] = [(x, y)]
        
        # order brecon nodes temporally
        brecon[node] = orderLoss(node, brecon, layout)
        # order layout nodes temporally
        layout[node] = orderLayout(node, layout)

        if y > max(l[1] for l in slayout.values()) + 50:
            print nx, ny
            print px, py
            print offset, frac
            print ylists[snode], yorders[node]
            print brecon[node]
            print node, snode, layout[node]

    
    # layout label sizes
    max_label_size = max(len(x.name)
        for x in tree.leaves()) * font_ratio * font_size
    max_slabel_size = max(len(x.name)
        for x in stree.leaves()) * font_ratio * stree_font_size


    '''
    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)
    
    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None
    
    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout
    '''
    
    xcoords, ycoords = zip(* slayout.values())
    maxwidth = max(xcoords) + max_label_size + max_slabel_size
    maxheight = max(ycoords) + yscale
    
    
    # initialize canvas
    if canvas is None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)
        
        canvas.beginSvg(width, height)
        canvas.beginStyle("font-family: \"Sans\";")
        
        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    canvas.beginTransform(("translate", lmargin, tmargin))
    
    draw_stree(canvas, stree, slayout,
               yscale=yscale,
               stree_width=stree_width, 
               stree_color=stree_color,
               snode_color=snode_color)

    # draw stree leaves
    for node in stree:
        x, y = slayout[node]
        if node.is_leaf():
            canvas.text(snames[node.name], 
                        x + leaf_padding + max_label_size,
                        y+stree_font_size/2., stree_font_size,
                        fillColor=snode_color)


    # draw tree

    for node in tree:

        containsL= containsLoss(node, brecon)
        for n in range(len(brecon[node])):
            x, y = layout[node][n]
            
            if containsL == False: # no loss event
                px, py = layout[node.parent][-1]       
            else: # loss event present
                if n == 0: # event is loss
                    px, py = layout[node.parent][-1]
                else: # event stems from loss
                    px, py = layout[node][n-1]
            

            trans = False

            if node.parent:
                snode, event, frequency =  brecon[node][n]
                if n == 0:
                    psnode, pevent, pfrequency = brecon[node.parent][-1]
                
                # Event stemming from a loss event
                else: psnode, pevent, pfrequency = brecon[node][n-1]
                if pevent == "trans" or pevent == "gtrans":
                    if psnode != snode:
                        trans = True
                else: trans = False

                if not trans:
                    canvas.line(x, y, px, py, color=tree_color)
                
                # draw the transfer dashed line        
                else:
                    arch = 20
                    x2 = (x*.5 + px*.5) - arch
                    y2 = (y*.5 + py*.5)
                    x3 = (x*.5 + px*.5) - arch
                    y3 = (y*.5 + py*.5)
                    # draw regular transfer dashed line
                    if pevent == "trans":
                        canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                            (x, y, x2, y2,
                             x3, y3, px, py,
                            " style='stroke-dasharray: 4, 2' " +
                            svg.colorFields(tree_trans_color, (0,0,0,0))))
                    # draw guilty transfer dashed line
                    else: canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                            (x, y, x2, y2,
                             x3, y3, px, py,
                            " style='stroke-dasharray: 4, 2' " +
                            svg.colorFields(gtrans_color, (0,0,0,0))))


    # draw events
    for node in tree:
        if node.name == rootNode:
            x, y = layout[node][0]
            canvas.polygon((x-20, y, x-50, y+30,x-50, y+15, x-90, y+15, x-90,\
             y-15, x-50, y-15, x-50, y-30), strokeColor = (1,.7,.3), \
             fillColor = (1,.7,.3))

            canvas.text("Root Node", x-88, y+5, font_size+2,\
                fillColor = (0,0,0))
        for n in range(len(brecon[node])):
            snode, event, frequency =  brecon[node][n]
            frequency = float(frequency)
            x, y = layout[node][n]
            o = event_size / 2.0
            if event == "loss": # draw boxes, frequencies of loss events
                canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=loss_color,
                        strokeColor=loss_color_border)
                canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor = loss_color)

    
            if event == "spec": # draw boxes, frequencies of speciation events
                canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=(0,0,0),
                        strokeColor=(0,0,0))
                canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor = (0,0,0))


            if event == "dup": # draw boxes, frequencies of duplication events
                canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=dup_color,
                        strokeColor=dup_color_border)
                canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=dup_color)

            elif event == "trans": # draw boxes, frequencies of transfer events
                canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=trans_color,
                        strokeColor=trans_color_border)
                canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=trans_color)
            
            elif event == "gtrans": # draw boxes, frequencies of guilty transfer events
                canvas.rect(x-o, y-o, event_size, event_size,
                        fillColor=gtrans_color,
                        strokeColor=gtrans_color_border)
                canvas.text("{:.3f}".format(frequency)+node.name, x-o, y-o, font_size+2, fillColor=gtrans_color)

    # draw tree leaves
    for node in tree:
        for n in range(len(brecon[node])):
            x, y = layout[node][n]
            if node.is_leaf() and brecon[node][n][1] == "gene":
                canvas.text(node.name, 
                        x + leaf_padding, y+font_size/2., font_size+2,
                        fillColor=(0, 0, 0))

        
    canvas.endTransform()
    
    if autoclose:
        canvas.endStyle()
        canvas.endSvg()
    
    return canvas
def boxPlot(dataPath = '/home/muddcs15/research/work/hemiplasy/results/',
            prob1 = '0.001',
            prob2 = '0.05',
            prob3 = '0.1',
            prob4 = '0.5',
            spectree = '/home/muddcs15/research/work/hemiplasy/data/config/fungi.stree'):
    """
    A function that will output boxplots of probability of hemiplasy and probability of hemiplasy over duploss vs. initial allele frequency
    """
    

    stree = treelib.read_tree(spectree) # species tree
    species = stree.leaf_names()
    species1 = []
    species2 = []
    for node in stree:
        if len(node.leaves()) == 2:
            species1.append(node.children[0].name)
            species2.append(node.children[1].name)

    # identify the files for each of the different initial frequencies
    probs1 = os.path.join(dataPath, 'probabilities-' + prob1 + '.txt')
    probs2 = os.path.join(dataPath, 'probabilities-' + prob2 + '.txt')
    probs3 = os.path.join(dataPath, 'probabilities-' + prob3 + '.txt')
    probs4 = os.path.join(dataPath, 'probabilities-' + prob4 + '.txt')
    probsList = [probs1, probs2, probs3, probs4]

    totalPerList = []   # probability of hemiplasy compared to duploss
    totalAList = []     # probability of hemiplasy ocurring
    totalPairs = []

    h = 0 # probability that ocurred by hemiplasy
    d = 0 # probability that ocurred by duploss

    pair1 = []
    pair2 = []
    pair3 = []
    pair4 = []
    pair5 = []

    # open each probability file
    for probFilename in probsList:
        events = open('/home/muddcs15/research/work/hemiplasy/results/hemiplasy-loss.txt', 'r')
        
        hList = []      # list of probability of hemiplasy
        perList = []    # list of percentage with prob hemiplasy > prob duploss
        aveList = []    # list of average probability of hemiplasy per fam id

        countTrue = 0

        # look at each famid for that initial frequency
        probFile = open(probFilename, "r")
        for line in probFile:
            sepProbs = line.split()
            fam = sepProbs.pop(0)
            famid = fam[6:]

            # get the probability of duploss and hemiplasy for each trial in each famid
            for pair in sepProbs:
                duploss, hemiplasy = map(float, pair.split(','))
                hList.append(hemiplasy)
                # check whether hemiplasy is more likely or duploss
                if hemiplasy > duploss:
                    h += 1
                else:
                    d += 1

            # calculate the percent that likely ocurred by hemiplasy            
            percent = float(h)/float(h+d)
            # get the average probability of hemiplasy for each famid
            ave = stats.mean(hList)

            # append percent by hemiplasy to perList and average for the famid to aveList
            perList.append(percent)
            aveList.append(ave)
            
            for line in events:
                ev_famid, locus, spcs, gns, snode, lca = line.rstrip().split('\t')
                if famid == ev_famid:
                    countTrue += 1
                    for sp1, sp2 in zip(species1, species2):
                        if (sp1 in spcs and sp2 not in spcs):
                            spec_check = sp1
                            specPos = species1.index(sp1)
                            
                        elif (sp2 in spcs and sp1 not in spcs):
                            spec_check = sp2
                            specPos = species2.index(sp2)
                    break
                    
            if specPos == 0:
                pair1.append(ave)
            if specPos == 1:
                pair2.append(ave)
            if specPos == 2:
                pair3.append(ave)
            if specPos == 3:
                pair4.append(ave)
            if specPos == 4:
                pair5.append(ave)
        events.close()
                
        # append the lists through each famid to the large lists for each list of values
        totalPerList.append(perList)
        
        totalAList.append(aveList)
        

        # close file
        probFile.close()
        
    totalPairs.append(pair1)
    totalPairs.append(pair2)
    totalPairs.append(pair3)
    totalPairs.append(pair4)
    totalPairs.append(pair5)
    
    plt.boxplot(totalPairs)
    plt.title('Hemiplasy by Pairs')
    plt.xlabel('Pair')
    plt.ylabel('Probability')
    
    # print the plots
    plt.show()
Beispiel #11
0
def mleNormal(lens):
    mu = stats.mean(lens)
    sigma = stats.sdev(lens)
    return mu, sigma
Beispiel #12
0
def draw_tree(tree,
              brecon,
              stree,
              xscale=100,
              yscale=100,
              leaf_padding=10,
              label_size=None,
              label_offset=None,
              font_size=12,
              stree_font_size=20,
              canvas=None,
              autoclose=True,
              rmargin=10,
              lmargin=10,
              tmargin=0,
              bmargin=0,
              tree_color=(0, 0, 0),
              tree_trans_color=(0, 0, 0),
              stree_color=(.6, .3, .8),
              snode_color=(.2, .2, .7),
              loss_color=(1, 1, 1),
              loss_color_border=(.5, .5, .5),
              dup_color=(1, 0, 0),
              dup_color_border=(.5, 0, 0),
              trans_color=(0, 1, 0),
              trans_color_border=(0, .5, 0),
              event_size=10,
              snames=None,
              rootlen=None,
              stree_width=.8,
              filename="tree.svg"):

    # set defaults
    font_ratio = 8. / 11.

    if label_size is None:
        label_size = .7 * font_size

    #if label_offset is None:
    #    label_offset = -1

    if sum(x.dist for x in tree.nodes.values()) == 0:
        legend_scale = False
        minlen = xscale

    if snames is None:
        snames = dict((x, x) for x in stree.leaf_names())

    # layout stree
    slayout = treelib1.layout_tree(stree, xscale, yscale)
    if rootlen is None:
        rootlen = .1 * max(l[0] for l in slayout.values())

    # setup slayout
    x, y = slayout[stree.root]
    slayout[None] = (x - rootlen, y)
    for node, (x, y) in slayout.items():
        slayout[node] = (x + rootlen, y - .5 * yscale)

    # layout tree
    ylists = defaultdict(lambda: [])
    yorders = {}

    # layout speciations and genes (y)
    for node in tree.preorder():
        for ev in brecon[node]:
            snode, event, frequency = ev
            if event == "spec" or event == "gene" or event == "loss":
                yorders[node] = len(ylists[snode])
                ylists[snode].append(node)
    # layout dups and transfers (y)
    for node in tree.postorder():
        for ev in brecon[node]:
            snode, event, frequency = ev
            if event != "spec" and event != "gene" and event != "loss":
                v = [
                    yorders[child] for child in node.children
                    if brecon[child][-1][0] == snode
                ]
                if len(v) == 0:
                    yorders[node] = 0
                else:
                    yorders[node] = stats.mean(v)

    # layout node (x)
    xorders = {}
    xmax = defaultdict(lambda: 0)
    for node in tree.postorder():
        for ev in brecon[node]:
            snode, event, frequency = ev
            if event == "spec" or event == "gene" or event == "loss":
                xorders[node] = 0
            else:
                v = [
                    xorders[child] for child in node.children
                    if brecon[child][-1][0] == snode
                ]
                if len(v) == 0:
                    xorders[node] = 1
                else:
                    xorders[node] = max(v) + 1
            xmax[snode] = max(xmax[snode], xorders[node])

    # setup layout
    layout = {None: [slayout[brecon[tree.root][-1][0].parent]]}
    for node in tree:
        for ev in brecon[node]:
            snode, event, frequency = ev
            nx, ny = slayout[snode]
            px, py = slayout[snode.parent]

            # calc x
            frac = (xorders[node]) / float(xmax[snode] + 1)
            deltax = nx - px
            x = nx - frac * deltax

            # calc y
            deltay = ny - py
            slope = deltay / float(deltax)
            deltax2 = x - px
            deltay2 = slope * deltax2
            offset = py + deltay2

            frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1)
            y = offset + (frac - .5) * stree_width * yscale

            if node in layout: layout[node].append((x, y))
            else:
                layout[node] = [(x, y)]
        brecon[node] = orderLoss(node, brecon, layout)
        print "Brecon = ", brecon[node]
        layout[node] = orderLayout(node, layout)
        print "Layout = ", layout[node]
        if y > max(l[1] for l in slayout.values()) + 50:
            print nx, ny
            print px, py
            print offset, frac
            print ylists[snode], yorders[node]
            print brecon[node]
            print node, snode, layout[node]

    # layout label sizes
    max_label_size = max(len(x.name)
                         for x in tree.leaves()) * font_ratio * font_size
    max_slabel_size = max(
        len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size
    '''
    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)
    
    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None
    
    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout
    '''

    xcoords, ycoords = zip(*slayout.values())
    maxwidth = max(xcoords) + max_label_size + max_slabel_size
    maxheight = max(ycoords) + .5 * yscale

    # initialize canvas
    if canvas is None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)

        canvas.beginSvg(width, height)
        canvas.beginStyle("font-family: \"Sans\";")

        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    canvas.beginTransform(("translate", lmargin, tmargin))

    draw_stree(canvas,
               stree,
               slayout,
               yscale=yscale,
               stree_width=stree_width,
               stree_color=stree_color,
               snode_color=snode_color)

    # draw stree leaves
    for node in stree:
        x, y = slayout[node]
        if node.is_leaf():
            canvas.text(snames[node.name],
                        x + leaf_padding + max_label_size,
                        y + stree_font_size / 2.,
                        stree_font_size,
                        fillColor=snode_color)

    # draw tree

    for node in tree:
        containsL = containsLoss(node, brecon)
        for n in range(len(brecon[node])):
            # print brecon[node]
            x, y = layout[node][n]
            # print layout[node]
            if containsL == False:
                px, py = layout[node.parent][-1]
            else:
                if brecon[node][n][1] == "loss":
                    px, py = layout[node.parent][-1]
                else:
                    px, py = layout[node][n - 1]

            trans = False

            if node.parent:
                for ev in brecon[node]:
                    snode, event, frequency = ev
                    psnode = brecon[node.parent][-1][0]
                while snode:
                    if psnode == snode:
                        break
                    snode = snode.parent
                else:
                    trans = True

            if not trans:
                canvas.line(x, y, px, py, color=tree_color)
            else:
                arch = 20
                x2 = (x * .5 + px * .5) - arch
                y2 = (y * .5 + py * .5)
                x3 = (x * .5 + px * .5) - arch
                y3 = (y * .5 + py * .5)

                canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                             (x, y, x2, y2, x3, y3, px, py,
                              " style='stroke-dasharray: 4, 2' " +
                              svg.colorFields(tree_trans_color, (0, 0, 0, 0))))

    # draw events
    for node in tree:
        for n in range(len(brecon[node])):
            snode, event, frequency = brecon[node][n]
            x, y = layout[node][n]
            o = event_size / 2.0
            if event == "loss":
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=loss_color,
                            strokeColor=loss_color_border)
                canvas.text(frequency,
                            x - o,
                            y - o,
                            font_size,
                            fillColor=(1, 1, 1))

            if event == "spec":
                canvas.text(frequency,
                            slayout[snode][0] - leaf_padding / 2,
                            slayout[snode][1] - font_size,
                            font_size,
                            fillColor=(0, 0, 0))

            if event == "dup":
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=dup_color,
                            strokeColor=dup_color_border)
                canvas.text(frequency,
                            x - o,
                            y - o,
                            font_size,
                            fillColor=dup_color)
            elif event == "trans":
                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=trans_color,
                            strokeColor=trans_color_border)
                canvas.text(frequency,
                            x - o,
                            y - o,
                            font_size,
                            fillColor=trans_color)

    # draw tree leaves
    for node in tree:
        for n in range(len(brecon[node])):
            x, y = layout[node][n]
            if node.is_leaf() and containsLoss(node, brecon) == False:
                canvas.text(node.name,
                            x + leaf_padding,
                            y + font_size / 2.,
                            font_size,
                            fillColor=(0, 0, 0))

    canvas.endTransform()

    if autoclose:
        canvas.endStyle()
        canvas.endSvg()

    return canvas
def draw_tree(tree, brecon, stree,
              xscale=100, yscale=100,
              leaf_padding=10, 
              label_size=None,
              label_offset=None,
              font_size=12,
              stree_font_size=20,
              canvas=None, autoclose=True,
              rmargin=10, lmargin=10, tmargin=0, bmargin=0,
              tree_color=(0, 0, 0),
              tree_trans_color=(0, 0, 0),
              stree_color=(.4, .4, 1),
              snode_color=(.2, .2, .7),
              dup_color=(1, 0, 0),
              dup_color_border=(.5, 0, 0),
              trans_color=(0, 1, 0),
              trans_color_border=(0, .5, 0),
              event_size=10,
              snames=None,
              rootlen=None,
              stree_width=.8,
              filename="tree.svg"
              ):

    # set defaults
    font_ratio = 8. / 11.

    
    if label_size is None:
        label_size = .7 * font_size

    #if label_offset is None:
    #    label_offset = -1

    if sum(x.dist for x in tree.nodes.values()) == 0:
        legend_scale = False
        minlen = xscale

    if snames is None:
        snames = dict((x, x) for x in stree.leaf_names())

    # layout stree
    slayout = treelib.layout_tree(stree, xscale, yscale)

    if rootlen is None:
        rootlen = .1 * max(l[0] for l in slayout.values())

    # setup slayout
    x, y = slayout[stree.root]
    slayout[None] =  (x - rootlen, y)
    for node, (x, y) in slayout.items():
        slayout[node] = (x + rootlen, y  - .5 * yscale)

    # layout tree
    ylists = defaultdict(lambda: [])
    yorders = {}

    # layout speciations and genes (y)
    for node in tree.preorder():
        snode, event = brecon[node][-1]
        if event == "spec" or event == "gene":
            yorders[node] = len(ylists[snode])
            ylists[snode].append(node)

    # layout dups and transfers (y)
    for node in tree.postorder():
        snode, event = brecon[node][-1]
        if event != "spec" and event != "gene":
            v = [yorders[child]
                 for child in node.children
                 if brecon[child][-1][0] == snode]
            if len(v) == 0:
                yorders[node] = 0
            else:
                yorders[node] = stats.mean(v)

    # layout node (x)
    xorders = {}
    xmax = defaultdict(lambda: 0)
    for node in tree.postorder():
        snode, event = brecon[node][-1]
        if event == "spec" or event == "gene":
            xorders[node] = 0
        else:
            v = [xorders[child] for child in node.children
                 if brecon[child][-1][0] == snode]
            if len(v) == 0:
                xorders[node] = 1
            else:
                xorders[node] = max(v) + 1
        xmax[snode] = max(xmax[snode], xorders[node])

    # setup layout
    layout = {None: slayout[brecon[tree.root][-1][0].parent]}
    for node in tree:
        snode = brecon[node][-1][0]
        nx, ny = slayout[snode]
        px, py = slayout[snode.parent]

        # calc x
        frac = (xorders[node]) / float(xmax[snode] + 1)
        deltax = nx - px
        x = nx - frac * deltax

        # calc y
        deltay = ny - py
        slope = deltay / float(deltax)
        deltax2 = x - px
        deltay2 = slope * deltax2
        offset = py + deltay2
        
        frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1)
        y = offset + (frac - .5) * stree_width * yscale

        
        layout[node] = (x, y)

        if y > max(l[1] for l in slayout.values()) + 50:
            print nx, ny
            print px, py
            print offset, frac
            print ylists[snode], yorders[node]
            print brecon[node]
            print node, snode, layout[node]


    # layout label sizes
    max_label_size = max(len(x.name)
        for x in tree.leaves()) * font_ratio * font_size
    max_slabel_size = max(len(x.name)
        for x in stree.leaves()) * font_ratio * stree_font_size

    

    '''
    if colormap == None:
        for node in tree:
            node.color = (0, 0, 0)
    else:
        colormap(tree)
    
    if stree and gene2species:
        recon = phylo.reconcile(tree, stree, gene2species)
        events = phylo.label_events(tree, recon)
        losses = phylo.find_loss(tree, stree, recon)
    else:
        events = None
        losses = None
    
    # layout tree
    if layout is None:
        coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen)
    else:
        coords = layout
    '''
    
    xcoords, ycoords = zip(* slayout.values())
    maxwidth = max(xcoords) + max_label_size + max_slabel_size
    maxheight = max(ycoords) + .5 * yscale
    
    
    # initialize canvas
    if canvas is None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)
        
        canvas.beginSvg(width, height)
        canvas.beginStyle("font-family: \"Sans\";")
        
        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    canvas.beginTransform(("translate", lmargin, tmargin))
    
    draw_stree(canvas, stree, slayout,
               yscale=yscale,
               stree_width=stree_width, 
               stree_color=stree_color,
               snode_color=snode_color)

    # draw stree leaves
    for node in stree:
        x, y = slayout[node]
        if node.is_leaf():
            canvas.text(snames[node.name], 
                        x + leaf_padding + max_label_size,
                        y+stree_font_size/2., stree_font_size,
                        fillColor=snode_color)


    # draw tree
    for node in tree:
        x, y = layout[node]
        px, py = layout[node.parent]
        trans = False

        if node.parent:
            snode =  brecon[node][-1][0]
            psnode = brecon[node.parent][-1][0]
            while snode:
                if psnode == snode:
                    break
                snode = snode.parent
            else:
                trans = True

        if not trans:
            canvas.line(x, y, px, py, color=tree_color)
        else:
            arch = 20
            x2 = (x*.5 + px*.5) - arch
            y2 = (y*.5 + py*.5)
            x3 = (x*.5 + px*.5) - arch
            y3 = (y*.5 + py*.5)
            
            canvas.write("<path d='M%f %f C%f %f %f %f %f %f' %s />\n " %
                         (x, y, x2, y2,
                          x3, y3, px, py,
                          " style='stroke-dasharray: 4, 2' " +
                          svg.colorFields(tree_trans_color, (0,0,0,0))))


    # draw events
    for node in tree:
        snode, event =  brecon[node][-1]
        x, y = layout[node]
        o = event_size / 2.0

        if event == "dup":
            canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=dup_color,
                        strokeColor=dup_color_border)
        elif event == "trans":
            canvas.rect(x - o, y - o, event_size, event_size,
                        fillColor=trans_color,
                        strokeColor=trans_color_border)
        

    # draw tree leaves
    for node in tree:
        x, y = layout[node]
        if node.is_leaf():
            canvas.text(node.name, 
                        x + leaf_padding, y+font_size/2., font_size,
                        fillColor=(0, 0, 0))

        
    canvas.endTransform()
    
    if autoclose:
        canvas.endStyle()
        canvas.endSvg()
    
    return canvas
Beispiel #14
0
def draw_tree(tree,
              stree,
              extra,
              xscale=100,
              yscale=100,
              leaf_padding=10,
              label_size=None,
              label_offset=None,
              font_size=12,
              stree_font_size=20,
              canvas=None,
              autoclose=True,
              rmargin=10,
              lmargin=10,
              tmargin=0,
              bmargin=0,
              stree_color=(.4, .4, 1),
              snode_color=(.2, .2, .7),
              event_size=10,
              rootlen=None,
              stree_width=.8,
              filename=sys.stdout,
              labels=None,
              slabels=None):

    recon = extra["species_map"]
    loci = extra["locus_map"]
    order = extra["order"]

    # setup color map
    all_loci = sorted(set(loci.values()))
    num_loci = len(all_loci)
    colormap = util.rainbow_color_map(low=0, high=num_loci - 1)
    locus_color = {}
    for ndx, locus in enumerate(all_loci):
        locus_color[locus] = colormap.get(ndx)

    # set defaults
    font_ratio = 8. / 11.

    if label_size is None:
        label_size = .7 * font_size

    #if label_offset is None:
    #    label_offset = -1

    if sum(x.dist for x in tree.nodes.values()) == 0:
        legend_scale = False
        minlen = xscale

    snames = dict((x, x) for x in stree.leaf_names())

    if labels is None:
        labels = {}
    if slabels is None:
        slabels = {}

    # layout stree
    slayout = treelib.layout_tree(stree, xscale, yscale)

    if rootlen is None:
        rootlen = .1 * max(l[0] for l in slayout.values())

    # setup slayout
    x, y = slayout[stree.root]
    slayout[None] = (x - rootlen, y)
    for node, (x, y) in slayout.items():
        slayout[node] = (x + rootlen, y - .5 * yscale)

    # layout tree
    ylists = defaultdict(lambda: [])
    yorders = {}

    # layout speciations and genes (y)
    events = phylo.label_events(tree, recon)
    for node in tree.preorder():
        snode = recon[node]
        event = events[node]
        if event == "spec" or event == "gene":
            yorders[node] = len(ylists[snode])
            ylists[snode].append(node)

    # layout internal nodes (y)
    for node in tree.postorder():
        snode = recon[node]
        event = events[node]
        if event != "spec" and event != "gene":
            v = [yorders[child] for child in node.children]
            yorders[node] = stats.mean(v)

    # layout node (x)

    xorders = {}
    xmax = defaultdict(lambda: 0)
    for node in tree.postorder():
        snode = recon[node]
        event = events[node]
        if event == "spec" or event == "gene":
            xorders[node] = 0
        else:
            v = [xorders[child] for child in node.children]
            xorders[node] = max(v) + 1
        xmax[snode] = max(xmax[snode], xorders[node])

##    # initial order
##    xpreorders = {}
##    for node in tree.postorder():
##        snode = recon[node]
##        event = events[node]
##        if event == "spec" or event == "gene":
##            xpreorders[node] = 0
##        else:
##            v = [xpreorders[child] for child in node.children]
##            xpreorders[node] = max(v) + 1
####        print node.name, xpreorders[node]
##    # hack-ish approach : shift x until order is satisfied
##    def shift(node, x):
##        xpreorders[node] += x
##        for child in node.children:
##            if events[child] != "spec":
##                shift(child, x)
##    satisfied = False
##    while not satisfied:
##        satisfied = True
##        for snode, d in order.iteritems():
##            for plocus, lst in d.iteritems():
##                # test each pair
##                for m, node1 in enumerate(lst):
##                    x1 = xpreorders[node1]
##                    for node2 in lst[m+1:]:
##                        x2 = xpreorders[node2]
####                        print node1, node2, x1, x2
##                        if x2 < x1:
##                            # violation - shift all descendants in the sbranch
##                            satisfied = False
####                            print 'violation', node1, node2, x1, x2, x1-x2+1
##                            shift(node2, x1-x2+1)
##                            break
##    # finally, "normalize" xorders
##    xorders = {}
##    xmax = defaultdict(lambda: 0)
##    for node in tree.postorder():
##        snode = recon[node]
##        xorders[node] = xpreorders[node]
##        xmax[snode] = max(xmax[snode], xorders[node])
####        print node.name, xpreorders[node]

# setup layout
    layout = {None: slayout[None]}
    for node in tree:
        snode = recon[node]
        nx, ny = slayout[snode]
        px, py = slayout[snode.parent]

        # calc x
        frac = (xorders[node]) / float(xmax[snode] + 1)
        deltax = nx - px
        x = nx - frac * deltax

        # calc y
        deltay = ny - py
        slope = deltay / float(deltax)
        deltax2 = x - px
        deltay2 = slope * deltax2
        offset = py + deltay2

        frac = (yorders[node] + 1) / float(max(len(ylists[snode]), 1) + 1)
        y = offset + (frac - .5) * stree_width * yscale

        layout[node] = (x, y)

##        if y > max(l[1] for l in slayout.values()) + 50:
##            print nx, ny
##            print px, py
##            print offset, frac
##            print ylists[snode], yorders[node]
##            print node, snode, layout[node]

# layout label sizes
    max_label_size = max(len(x.name)
                         for x in tree.leaves()) * font_ratio * font_size
    max_slabel_size = max(
        len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size

    xcoords, ycoords = zip(*slayout.values())
    maxwidth = max(xcoords) + max_label_size + max_slabel_size
    maxheight = max(ycoords) + .5 * yscale

    # initialize canvas
    if canvas is None:
        canvas = svg.Svg(util.open_stream(filename, "w"))
        width = int(rmargin + maxwidth + lmargin)
        height = int(tmargin + maxheight + bmargin)

        canvas.beginSvg(width, height)
        canvas.beginStyle("font-family: \"Sans\";")

        if autoclose == None:
            autoclose = True
    else:
        if autoclose == None:
            autoclose = False

    canvas.beginTransform(("translate", lmargin, tmargin))

    draw_stree(canvas,
               stree,
               slayout,
               yscale=yscale,
               stree_width=stree_width,
               stree_color=stree_color,
               snode_color=snode_color,
               slabels=slabels)

    # draw stree leaves
    for node in stree:
        x, y = slayout[node]
        if node.is_leaf():
            canvas.text(snames[node.name],
                        x + leaf_padding + max_label_size,
                        y + stree_font_size / 2.,
                        stree_font_size,
                        fillColor=snode_color)

    # draw tree
    for node in tree:
        x, y = layout[node]
        px, py = layout[node.parent]

        if node.parent:
            color = locus_color[loci[node.parent]]
        else:
            color = locus_color[loci[tree.root]]

        canvas.line(x, y, px, py, color=color)

    # draw tree names
    for node in tree:
        x, y = layout[node]
        px, py = layout[node.parent]

        if node.is_leaf():
            canvas.text(node.name,
                        x + leaf_padding,
                        y + font_size / 2.,
                        font_size,
                        fillColor=(0, 0, 0))

        if node.name in labels:
            canvas.text(labels[node.name],
                        x,
                        y,
                        label_size,
                        fillColor=(0, 0, 0))

    # draw events
    for node in tree:
        if node.parent:
            locus = loci[node]
            plocus = loci[node.parent]

            if locus != plocus:
                color = locus_color[locus]
                x, y = layout[node]
                o = event_size / 2.0

                canvas.rect(x - o,
                            y - o,
                            event_size,
                            event_size,
                            fillColor=color,
                            strokeColor=color)

    canvas.endTransform()

    if autoclose:
        canvas.endStyle()
        canvas.endSvg()

    return canvas