def draw_tree_regions(clusterrunid, t, ts, cur, greyout=3):
    '''
    Draw the neighborhoods around each of the genes in a gene tree given the cluster and run IDs and the tree (t)

    clusterrunid is the run ID to use to identify homoloous clusters and ts is the treeStyle object associeted with the
    ETE tree t

    cur is a SQLite cursor object for the database

    The arrows are grayed out if less than "greyout" genes appear in a given cluster.
    '''

    # DEPRECIATED
    t, tblastnadded = removeLeadingDashes(t)

    unsanitized = []
    for genename in t.get_leaf_names():
        unsanitized.append(unsanitizeGeneId(genename))


    # Create a list of SeqFeature objects for the neighbors of each gene in the tree
    # If passed a TBLASTN hit it will create seq objects for every gene surrounding the TBLASTN hit and
    # for the TBLASTN hit itself.
    #
    # Nothing is added if we can't find that ID in the database or the ID is badly formatted.
    seqfeatures={}
    for genename in unsanitized:
        sys.stderr.write("Getting gene neighborhoods for gene %s...\n" %(genename) )
        features_for_genename = makeSeqFeaturesForGeneNeighbors(genename, clusterrunid, cur)
        if len(features_for_genename) > 0:
            seqfeatures[genename] = features_for_genename
        else:
            # Try TBLASTN and if that doesn't work, just give up.
            try:
                features_for_tblastn = makeSeqObjectsForTblastnNeighbors(genename, clusterrunid, cur)
                seqfeatures[genename] = features_for_tblastn
            except ValueError:
                sys.stderr.write("WARNING: Unable to find entries for gene or TBLASTN hit %s in database\n" %(genename) )
                pass

    # Don't bother trying the rest if nothing matches at all.
    if len(seqfeatures.keys()) == 0:
        sys.stderr.write("WARNING: No genes in input tree had entries in the database so no neighborhoods will be drawn\n")
        return t, ts

    # Get a list of clusters containing these genes
    allclusters = []
    for gene in seqfeatures:
        for feature in seqfeatures[gene]:
            allclusters.append(feature.qualifiers["cluster_id"])

    uniqueclusters = set(allclusters)

    # Get clusters that have enough members to bother trying to color them (as determined by
    # the greyout keyword)
    multipleclusters = [c for c in uniqueclusters if allclusters.count(c) >= greyout]

    # Don't die if nothing has enough clusters...
    if len(multipleclusters) > 0:
        getcolor = colormap(multipleclusters)
    else:
        getcolor = {}

    #also add in grey (0.5,0.5,0.5 in RGB) for all others
    singleclusters = [c for c in uniqueclusters if allclusters.count(c) < greyout]
    getcolor.update([(sc, (0.5,0.5,0.5)) for sc in singleclusters])

    #generate the region images for any leaf that has them, and map onto the tree
    #we will want to know the max width to make the figures
    widths = []
    for genelocs in seqfeatures.values():
        start, end = regionlength(genelocs)
        widths.append(abs(end - start))
    maxwidth = max(widths)

    for leaf in t.iter_leaves():
        newname = unsanitizeGeneId(leaf.name)
        # Not all genes necessarily are in the database and we don't want to crash if that happens.
        # Instead, Just don't print a neighborhood for them.
        try: 
            genelocs = seqfeatures[newname]
        except KeyError: 
            continue 
        sys.stderr.write("Making region drawing for gene ID %s...\n" %(newname))
        imgfileloc = make_region_drawing(genelocs, getcolor, newname, maxwidth)
        imageFace = faces.ImgFace(imgfileloc)
        leaf.add_face(imageFace, column=2, position = 'aligned')
        if newname in tblastnadded:
            leaf.add_face(TextFace("TBlastN added", fsize=30), column=3, position = 'aligned')

    #add legend for clusters
    ts = treelegend(ts, getcolor, greyout)

    return t, ts
Beispiel #2
0
    # I give it 60 pixels per column by default
    # (so that the width doesn't shrink down too much when we have more than a few columns)
    # However, the user has the ability to change thsi if they need to / want to for larger data sets
    profileFace  = ProfileFace(matrix_max, matrix_min, matrix_avg, width=numcols*options.data_width, height=35, style="heatmap")
    for node in t.traverse():
        if node.is_leaf():
            node.add_face(profileFace, 1, position = "aligned")
    
    # Add the color bar (kind of hacked in from matplotlib since there is no convenient way to get it from ETE)
    # I could generate this in situ... for now I just have a file I like and run with it.
    # This doesn't match exactlty becuase I don't have the time or motivation now to mess with QT to do it.
    # It should be pretty close though...
    from ete2 import ImgFace
    imgloc = os.path.join(locateRootDirectory(), "src", "internal", "Colormap.png")
    F1 = faces.TextFace("Minimum: %1.1f" %(matrix_min), ftype="Times", fsize=32 )
    F2 = faces.ImgFace(imgloc)
    F3 = faces.TextFace("%1.1f : Maximum" %(matrix_max), ftype="Times", fsize=32 )
    ts.legend.add_face(F1, 0)
    ts.legend.add_face(F2, 1)
    ts.legend.add_face(F3, 2)
    # Put it on the Bottom-left
    ts.legend_position = 3

if options.savenewick:
    t.write(outfile="%s.nwk" %(options.basename), format=0)

if options.savesvg:
    # Some versions of ETE create a "test.svg" and others do not.
    # To avoid confusion (and in case TreeStyle isn't enforced)
    # I just create a new one.
    os.system("rm test.svg 2> /dev/null")
Beispiel #3
0
def draw_tree_regions(clusterrunid, t, ts, cur, greyout=3, tempdir=None, label=False):
    '''
    Draw the neighborhoods around each of the genes in a gene tree given the cluster and run IDs and the tree (t)

    clusterrunid is the run ID to use to identify homologous clusters and ts is the treeStyle object associated with the
    ETE tree t

    cur is a SQLite cursor object for the database

    The arrows are grayed out if less than "greyout" genes appear in a given cluster.

    tempdir is a temporary directory in which to store the results. The user is responsible for deleting this
    directory afterwards to clean up.
    '''

    unsanitized = []
    for genename in t.get_leaf_names():
        unsanitized.append(unsanitizeGeneId(genename))


    # Create a list of SeqFeature objects for the neighbors of each gene in the tree
    # If passed a TBLASTN hit it will create seq objects for every gene surrounding the TBLASTN hit and
    # for the TBLASTN hit itself.
    #
    # Nothing is added if we can't find that ID in the database or the ID is badly formatted.
    seqfeatures={}
    for genename in unsanitized:
        sys.stderr.write("Getting gene neighborhoods for gene %s...\n" %(genename) )
        features_for_genename = makeSeqFeaturesForGeneNeighbors(genename, clusterrunid, cur)
        if len(features_for_genename) > 0:
            seqfeatures[genename] = features_for_genename
        else:
            # Try TBLASTN and if that doesn't work, just give up.
            try:
                features_for_tblastn = makeSeqObjectsForTblastnNeighbors(genename, clusterrunid, cur)
                seqfeatures[genename] = features_for_tblastn
            except ValueError:
                sys.stderr.write("WARNING: Unable to find entries for gene or TBLASTN hit %s in database\n" %(genename) )
                pass

    # Don't bother trying the rest if nothing matches at all.
    if len(list(seqfeatures.keys())) == 0:
        sys.stderr.write("WARNING: No genes in input tree had entries in the database so no neighborhoods will be drawn\n")
        return t, ts

    allseqfeatures = []
    for gene in seqfeatures:
        allseqfeatures += seqfeatures[gene]
    getcolor = makeClusterColorMap(allseqfeatures, greyout)

    #generate the region images for any leaf that has them, and map onto the tree
    #we will want to know the max width to make the figures
    widths = []
    for genelocs in list(seqfeatures.values()):
        start, end = regionlength(genelocs)
        widths.append(abs(end - start))
    maxwidth = max(widths)

    for leaf in t.iter_leaves():
        newname = unsanitizeGeneId(leaf.name)
        # Not all genes necessarily are in the database and we don't want to crash if that happens.
        # Instead, Just don't print a neighborhood for them.
        try: 
            genelocs = seqfeatures[newname]
        except KeyError: 
            continue 
        sys.stderr.write("Making region drawing for gene ID %s...\n" %(newname))
        imgfileloc = make_region_drawing(genelocs, getcolor, newname, maxwidth, tempdir=tempdir, label=label)
        imageFace = faces.ImgFace(imgfileloc)
        leaf.add_face(imageFace, column=2, position = 'aligned')

    #add legend for clusters
    ts = treelegend(ts, getcolor, greyout, clusterrunid, cur)

    return t, ts