Python fignum Beispiele, compbio.utils.plots.fignum Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: reinitz.py Projekt: bh0085/compbio

def get_reinitz_data(**kwargs):

    ofs = kwargs.get('ofs',0)
    do_plot_coords = kwargs.get('plot_coords',False)
    do_plot_vals = kwargs.get('plot_vals',False)

    idm= id_map()
    df = datafiles(**mem.rc(kwargs))

    #I'm not sure exactly how this dataset works but
    #each nuclei has a bunch of numbers that appear to be
    #monotonically increasing.
    #
    #I just take the first instance.
    nums = dict([(k,v[:,0]) for k, v in df.iteritems()])
    nuc_count = len(set(nums.values()[2]))
   
    values = dict([(k,v[nuc_count *ofs: nuc_count *(ofs + 1),-1]) 
                   for k, v in df.iteritems()])
    coords = dict([(k,v[nuc_count *ofs :nuc_count *(ofs + 1),1:3]) for k, v in df.iteritems()])

    #to check the basic consistency of the data, enable the plot routines.
    #I suppose that I could do this for all of the nuclei occurences...
    #right now, only the first is used.
    if do_plot_coords:
        f = myplots.fignum(1,(8,8))
        ax = f.add_subplot(111)
        ct = mycolors.getct(len(values))
        for i,k in enumerate(values.keys()):
            ax.scatter(coords[k][:,0][::1], coords[k][:,1][::1], 10,
                       edgecolor = 'none', alpha = .25,c =ct[i],
                       label = k, )

        f.savefig(myplots.figpath( 'reinitz_exprdata_coords_nuc_offset={0}'.format(ofs)))
    if do_plot_vals:
        f = myplots.fignum(1,(8,8))
        ax = f.add_subplot(111)
        ct = mycolors.getct(len(values))
        for i,k in enumerate(values.keys()):
            ax.scatter(coords[k][:,0][::1], values[k][::1], 10,
                       edgecolor = 'none',alpha = .25,c =ct[i],
                       label = k, )

        f.savefig(myplots.figpath( 'reinitz_exprdata_ap_vals_nuc_offset={0}'.format(ofs)))

    return coords, values

Beispiel #2

0

Datei anzeigen

Datei: utils.py Projekt: bh0085/projects

def show_subopts(structs, polys, energies):
    srted = argsort(energies)
    e = array(energies)
    cols = [1.,0.,0.] * ((e - min(e)) / (max(e) - min(e)))[:,newaxis]
    plf2 = myplots.fignum(7,(10,10))

    rplots.grid_rnas(polys[srted], 
                     colors =cols[srted],
                     size = (8,8), dims = [180,50])

Beispiel #3

0

Datei anzeigen

Datei: utils.py Projekt: bh0085/projects

def cluster_2_show(clusters, polys): 
    sortorder = argsort(clusters)
    ct_colors = mycolors.getct(len(set(clusters)))
    ct_dict = dict([(cluster, ct_colors[i]) for i, cluster in enumerate(set(clusters))])
    
    plf2 = myplots.fignum(8,(10,10))
    
    rplots.grid_rnas(polys[sortorder], 
                     colors  = [ct_dict[i] for i in clusters[sortorder]],
                     size = (5,5), dims = [180,50])

Beispiel #4

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def show_rna_structs(xvals, yvals, structs, energies, pfracs,
                     rname, rtype,ns,rfid,figsize,colors,
                     seq, n, selection_type,vert_idxs):


      verts = rutils.struct_verts([structs['structs'][i] for i in vert_idxs] 
                                  ,seq,rfid)
      
      f = myplots.fignum(3,figsize)
      ax = f.add_subplot(111)
      myplots.padded_limits(ax, xvals, yvals, .2)
      
      for vi, v in enumerate(verts):
            

            i = vert_idxs[vi]
            dims = [30]
            shadow_width = 10
            pkw0 = {'linewidth':shadow_width,
                    'color':'white',
                    'alpha':1,
                    'zorder':1.1}
            rplots.show_rna([xvals[i],yvals[i]], v,
                            dims = dims,
                            pkw = pkw0)
            
            pkw0 = {'linewidth':shadow_width,
                    'color':'white',
                    'alpha':.8,
                    'zorder':vi+2}
            rplots.show_rna([xvals[i],yvals[i]], v,
                            dims = dims,
                            pkw = pkw0)
            
            
            pkw1 = {'linewidth':2,
                    'color':colors[i],
                    'zorder':vi+2}
            rplots.show_rna([xvals[i],yvals[i]], v,
                            dims = dims, pkw = pkw1)
            ax.set_ylabel('mutation score')
            ax.set_xlabel('free energy (-kCal)')
            ax.annotate('''Suboptimal foldings, positioned by energy and
a mutation based evolutionary score.
Color indicates a second score from paired BL.''' , [0,1],xycoords ='axes fraction',
                        xytext = [10,-10], textcoords='offset pixels',
                        va = 'top')
          
      
      f.savefig(figfile.format('{3}_frac_silent_{0}_{1}{2}'.\
                                   format(rname,selection_type,n,rtype)))
      return vert_idxs

Beispiel #5

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def show_rna_structs(xvals, yvals, structs, energies, pfracs, rname, rtype, ns,
                     rfid, figsize, colors, seq, n, selection_type, vert_idxs):

    verts = rutils.struct_verts([structs['structs'][i] for i in vert_idxs],
                                seq, rfid)

    f = myplots.fignum(3, figsize)
    ax = f.add_subplot(111)
    myplots.padded_limits(ax, xvals, yvals, .2)

    for vi, v in enumerate(verts):

        i = vert_idxs[vi]
        dims = [30]
        shadow_width = 10
        pkw0 = {
            'linewidth': shadow_width,
            'color': 'white',
            'alpha': 1,
            'zorder': 1.1
        }
        rplots.show_rna([xvals[i], yvals[i]], v, dims=dims, pkw=pkw0)

        pkw0 = {
            'linewidth': shadow_width,
            'color': 'white',
            'alpha': .8,
            'zorder': vi + 2
        }
        rplots.show_rna([xvals[i], yvals[i]], v, dims=dims, pkw=pkw0)

        pkw1 = {'linewidth': 2, 'color': colors[i], 'zorder': vi + 2}
        rplots.show_rna([xvals[i], yvals[i]], v, dims=dims, pkw=pkw1)
        ax.set_ylabel('mutation score')
        ax.set_xlabel('free energy (-kCal)')
        ax.annotate('''Suboptimal foldings, positioned by energy and
a mutation based evolutionary score.
Color indicates a second score from paired BL.''', [0, 1],
                    xycoords='axes fraction',
                    xytext=[10, -10],
                    textcoords='offset pixels',
                    va='top')


    f.savefig(figfile.format('{3}_frac_silent_{0}_{1}{2}'.\
                                 format(rname,selection_type,n,rtype)))
    return vert_idxs

Beispiel #6

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view2():
    files = [l for l in os.listdir(cfg.dataPath("batch/outputs")) if "mcmc" in l]
    ids = [l[0:10] for l in files]
    ids = ids[::10]

    inps = [butils.load_data(i, "input") for i in ids]
    outs = [butils.load_data(i, "output") for i in ids]

    # idxs_good = nonzero(greater([elt.get('improve_ratio') for elt in outs],, .2 )[0]
    idxs_good = range(len(outs))

    outs = [o for idx, o in enumerate(outs) if idx in idxs_good]
    inps = [i for idx, i in enumerate(inps) if idx in idxs_good]

    params = inps[0].keys()

    f = myplots.fignum(1, (8, 8))

    params = params

    for i, p in enumerate(params):
        ax = f.add_axes([0.05, i * (1.0 / len(params)), 0.9, 1.0 / len(params)], title=p)
        # ax.set_yticks([])
        # ax.set_xticks([])

        xvals = [elt.get(p) for elt in inps]
        if type(xvals[0]) == str:
            continue
        yvals = [elt.get("improve_ratio") for elt in outs]
        yvals2 = [elt.get("stay_same") for elt in outs]

        yvals += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        yvals2 += random.rand(*shape(yvals)) * (max(yvals) - min(yvals)) / 50
        xvals += random.rand(*shape(xvals)) * (max(xvals) - min(xvals)) / 50
        ax.scatter(xvals, yvals)

        # ax.scatter(xvals , yvals + yvals2,   25, color = 'red')
        ax.annotate(p, [0, 0], xycoords="axes fraction", ha="left", va="bottom")

    f.savefig(cfg.dataPath("figs/soheil/broad_run0_psplits.ps"))
    raise Exception()

    return inps

Beispiel #7

0

Datei anzeigen

def tree_similarity(dist1, dist2, run_id,criterion = 'knn', k = 6):
    if criterion == 'knn':
        nq = len(dist1)
        nb1 = argsort(dist1, 1)[:,1:k+1]
        nb2 = argsort(dist2, 1)[:,1:k+1]
        all_nbs = [set(n1).union(set(n2)) for n1, n2 in zip(nb1, nb2)]
        nb_intersection = [set(n1).intersection(set(n2)) for n1, n2 in zip(nb1, nb2)]
        nb_dists = [ array([[dist1[i, n], dist2[i,n]]for n in nbs ]) for i,nbs in enumerate(all_nbs)]
        #take the first k distances.
        n_disagreements = [len(nbd) - k for nbd in nb_dists]
        nb_dists = array([ sorted(nbd, key = lambda x: min(x))[:k] for nbd in nb_dists])

        frac_diffs = [abs(diff(elt, 1).flatten()) / mean(elt,1) for  elt in nb_dists]
        abs_diffs = [abs(diff(elt, 1).flatten()) for  elt in nb_dists]
        
        ct = mycolors.getct(nq)
        f = myplots.fignum(4, (10,8))
        ax = f.add_axes([.05,.08,.25,.87])
        seismic.seismic(abs_diffs, ax = ax, colors = ct)
        
        jaccard = mean([float(len(nb_intersection[i])) / float(len(all_nbs[i])) for i in range(nq)])

        ax2 = f.add_axes([.34,.08,.6,.87])
        for i,d in enumerate(nb_dists):
            ax2.scatter(d[:,0], d[:,1], 20, alpha = .5,color =ct[i])

        
        lin = linregress(nb_dists[:,:,0].flatten(),nb_dists[:,:,1].flatten())
        rsquared = lin[2]**2

        ax2.annotate('NN dists for multi/struct-aligned trees.\nK = {0}'.format(k),
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,-10],textcoords = 'offset pixels')
        ax2.annotate('R-Squared: {0:3.3}\nJaccard Index: {1:3.3}'.format(rsquared, mean(jaccard)),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')
        ax2.set_xlabel('Muscle aligned tree distances')
        ax2.set_ylabel('Struct algined tree distances')
        
        datafile = cfg.dataPath('figs/gpm2/pt2_mus_cm_tree_dists_{0}_k{1}.tiff'.format(run_id, k))
        f.savefig(datafile)

Beispiel #8

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def show_errors(errors, staysames, improves, gnames):
    figtitle = "show_errors"
    f = myplots.fignum(3, (12, 6))
    ax = f.add_axes([0.05, 0.05, 0.25, 0.9])

    import scipy.signal as ss

    for all_errs in errors[0:1]:
        for e in all_errs.flatten()[:5]:
            ax.plot(ss.medfilt(e.flatten() ** 2, 51))

    get_worse = 1 - (array(staysames) + array(improves))
    ax2 = f.add_axes([0.3, 0.05, 0.65, 0.9])
    seismic.seismic(
        squeeze([get_worse, staysames, improves]),
        stacked=True,
        colors=[[1, 0, 0], [0, 0, 0], [0, 0, 1]],
        ax=ax2,
        linewidth=10,
        label_y=False,
    )

    f.savefig(figtemplate.format(figtitle))

Beispiel #9

0

Datei anzeigen

Datei: reinitz.py Projekt: bh0085/compbio

def check_network(net_name = 'binding', 
                  dataset_name = 'reinitz',
                  data_ofs = 4,
                  max_edges = -1,
                  node_restriction = 'reinitz'):

    reinitz_keys =set( get_reinitz_data()[1].keys())
    if dataset_name == 'reinitz':
        coords, values = get_reinitz_data(ofs = data_ofs)
    elif dataset_name == 'bdtnp':
        data = nio.getBDTNP()
        meta = nio.getBDTNP(misc = True)
        values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
    elif dataset_name == 'tc':
        data = nio.getTC()
        if node_restriction == 'reinitz':
            data = dict([(k,v) for k,v in data.iteritems() if k in reinitz_keys]) 
        #values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        #coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
        values = data
    else:
        raise Exception('data set {0} not yet implemented'.format(dataset_name))

    nets = comp.get_graphs()
    if net_name == 'binding':
        network = nets['bn']
    elif net_name == 'unsup':
        network = nets['unsup']
    elif net_name == 'logistic':
        network = nets['logistic']
    elif net_name =='clusters':
        network = get_soheil_network(max_edges = max_edges,
                                     node_restriction = values.keys())
    else:
        raise Exception('type not implemented: {0}'.format(net_name))

    nodes = values.keys()
    nodes_allowed = set(nodes)

    f = myplots.fignum(1,(8,8))
    ax = f.add_subplot(111)
    targets = {}

    edges = []
    
    for n in nodes:
        targets[n] = []
        if n in network:
            targets[n] = nodes_allowed.intersection(network[n].keys())
            
    xax = linspace(-1,1,20)

    edges = list(it.chain(*[[(e,v2) for v2 in v] for e, v in targets.iteritems()]))
    ccofs = [e for e in [ corrcoef(values[tf], values[tg])[0,1] for tf, tg in edges] if not isnan(e)]
    
    count, kde = make_kde(ccofs)
    

    ax.hist(ccofs,xax,label = net_name)
    h =histogram(ccofs,xax)
    ax.fill_between(xax,kde(xax)*max(h[0]),label = net_name,zorder = 1,alpha = .5)



    myplots.maketitle(ax,'edge correlations kde for {0}'.format('\n{2} data (data offset={0})\n(net_name={1})\n(max_edges={3})'
                                                                .format(data_ofs, net_name, dataset_name, max_edges) ),\
                          subtitle = 'n_edges = {0}'.format(len(edges)))
    ax.legend()
    f.savefig(myplots.figpath('network_edge_corrs_data_ofs={0}_net={1}_expr={2}_max_edges={3}'
                              .format(data_ofs,net_name,dataset_name, max_edges)))

Beispiel #10

0

Datei anzeigen

Datei: heatmaps.py Projekt: bh0085/compbio

def srt_heatmap(net = 3,
                all_module = False):
    import compbio.projects.bsort.bsort as bs

    arr, cols, rows = load(net = net,
                              max_go_modules = 15,
                              min_go_size = 5, 
                              min_module_size = 10)

    arr2_510, srts = bs.run0(arr = arr, itr = 2, meth = 'moment')
    arr2_510 = arr2_510[:,::-1]
    csrts = [s for s in srts if len(s) == len(cols)]
    rsrts = [s for s in srts if len(s) == len(rows)]

    c0 = array(cols)
    r0 = array(rows)
    for c in csrts: cols = cols[c]
    for r in rsrts: rows = rows[r]

    fopen =open( cfg.dataPath('daniel/heatmaps_sorted/hm_net{0}.txt'.format(net)), 'w')
    fopen.write('FORMAT: L1 :GO Terms (Columns), L2: Modules (Rows), L3+ Pvals thresholded between .01, .001\n')
    fopen.write('\t'.join([str(elt) for elt in cols]) + '\n')
    fopen.write('\t'.join([str(squeeze(elt)) for elt in rows]) + '\n')

    dmat = arr2_510
    for row in dmat:
        fopen.write('\t'.join(['{0}'.format(elt) for elt in row])+'\n')
    fopen.close()

    f = myplots.fignum(3, (8,40)) if net == 3 else myplots.fignum(3, (8,10))
    ax = f.add_axes([.4,.05,.55,.9], aspect = 'auto')
    
    goterms = [str(elt) for elt in cols]

    if not all_module:
        idx_omit = goterms.index('all')
        arr2_510 = vstack((arr2_510[:idx_omit],arr2_510[idx_omit+1:]))
        goterms = goterms[:idx_omit] + goterms[idx_omit+1:]
        
        

    fopen = open(cfg.dataPath('daniel/go_accession_name_map.txt'))
    gotexts = {}
    for l in fopen.xreadlines():
        k,v = l.split('\t')
        gotexts[k] = v
    row_labels = []
    for g in goterms:
        if g in gotexts.keys(): row_labels.append(gotexts[g].strip())
        else: row_labels.append(g.strip())
    


    ax.set_yticks(arange(len(goterms))+.25)
    ax.set_yticklabels(row_labels, size = '4')
    ax.set_xticks([])
    ax.set_xlabel('Modules')

    ax.set_xticks(arange(len(rows))+.25)
    ax.set_xticklabels([str(int(r[0])) for r in rows],
                       rotation= 90,
                       size = 'small')

    

    cm = mycolors.blackbody(flip = True)
    im =ax.imshow(arr2_510[:,:] * 4 + 2,
              cmap = plt.get_cmap('OrRd'),
              aspect = 'auto', 
              interpolation = 'nearest'
              )
    plt.colorbar(im)
    f.savefig(cfg.dataPath('daniel/heatmaps_sorted/hm_net{0}_{1}.pdf'.\
                               format(net, 'with_all' if all_module else 'no_all')))

Beispiel #11

0

Datei anzeigen

def show_conservation(fidx = 0, reset = False):
    fnum = flist[fidx]
    rfid = 'RF{0:05}'.format(fnum)
    print rfid
    if fnum ==50: ftype = 'riboswitch'
    else: ftype = 'all'
    
    
    out = mem.getOrSet(setFamData,
                              **mem.rc({}, reset =reset,
                                       on_fail = 'compute',
                                       hardcopy = False,
                                       register = 'fdat'+rfid,
                                       ftype = ftype,
                                       rfid = rfid))

    
    mvals, tvals, structs = mem.getOrSet(setTree,
                                         **mem.rc({},reset = reset,
                                                  on_fail = 'compute',
                                                  hardcopy = True,
                                                  register = 'st'+rfid,
                                                  rfid = rfid,
                                                  ftype = ftype))
    
    idxs, tidx  = sutils.show_paired_v_energy(rfid,rfid,mvals,tvals,structs,ftype)
    
    all_pairs = structs['structs']
    all_energies = structs['energies']
    
    pints,eints, mints, tints = [structs['structs'][i] for i in idxs],\
        [ structs['energies'][i] for i in idxs],\
        [ mvals[tidx][i] for i in idxs],\
        [ tvals[tidx][i] for i in idxs]
    seq = structs['seq']
    
    if do_make_subopts:
        subopts = rutils.suboptimals(seq, n = 400)
        verts = rutils.struct_verts(subopts, seq, rfid)
        f = myplots.fignum(4,figsize)
        rplots.grid_rnas(verts, dims = [40])
        f.savefig(figfile.format('{0}_grid_rnas'.\
                                     format(rfid)))

    
                



    aff = rutils.struct_affinity_matrix(all_pairs, len(seq))
    pca = rutils.project_structs(all_pairs,
                          ptype ='pca',
                          affinities = aff,
                          n_comp = 3) 

    for metric in ['n_comp']:# ['frac_silent','frac_paired','n_comp']:
      scolors = []
      for i in range(len(tvals[tidx])):
          m_silent, pidxs, frac_good = sutils.metric(
              mvals[tidx][i],tvals[tidx][i],
              mtype = metric)
          
          scolors.append(mean(m_silent))
      scolors = myplots.rescale(scolors, [0.,1.])[:,newaxis] * array([1.,0.,0.])
      
      
      f = myplots.fignum(4,figsize)
      ax = f.add_subplot(111)
      xvals, yvals = pca[:,:2].T
      myplots.padded_limits(ax, xvals, yvals)
      
      ax.scatter(xvals,yvals,300,linewidth = 1,
                 edgecolor = 'black', color = scolors)

      ax.scatter(pca[idxs,0],pca[idxs,1], 2100 ,alpha = 1, 
                 color = 'black')
      ax.scatter(pca[idxs,0],pca[idxs,1], 2000 ,alpha = 1, 
                 color = 'white')
      ax.scatter(pca[idxs,0],pca[idxs,1], 400 ,alpha = 1, 
                 color = scolors[idxs],
                 )


      ax.annotate('''Conservation metric: {0}
Projected onto C=2 Principal Components'''.format(metric),
                  [0,1],xycoords = 'axes fraction', va = 'top',
                  xytext = [10,-10],textcoords='offset points')
      
      f.savefig(figfile.format('{0}_pca_{1}'.\
                                 format(rfid, metric)))

Beispiel #12

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view3():

    files = [l for l in os.listdir(cfg.dataPath("batch/tmp")) if "mcmc" in l]
    fpaths = [os.path.join(cfg.dataPath("batch/tmp"), f) for f in files]
    ids = [l[0:10] for l in files]

    inps = [butils.load_data(i, "input") for i in ids]
    idxs_good = nonzero(greater([elt.get("out_iter_num") for elt in inps], 2))[0]
    inps = [inps[i] for i in idxs_good]
    fpaths = [fpaths[i] for i in idxs_good]

    fig = myplots.fignum(3, (35, 15))
    ax = fig.add_axes([0, 0, 1, 1])

    for f, inp in zip(fpaths, inps):
        if inp["out_iter_num"] == 2:
            continue
        print inp["filename"]

        data = sio.loadmat(f)

        import compbio.utils.colors as mycolors

        ct = mycolors.getct(len(data["gene_names"]))

        term_list = [list(it.chain(*mod)) for mod in data["model"]]
        fac_list = [list(it.chain(*t)) for t in term_list]

        xvals, yvals, colors, rads = [], [], [], []
        for i, terms in enumerate(term_list):
            for j, term in enumerate(terms):
                for k, fact in enumerate(term):
                    xvals.extend([i] * len(term))
                    yvals.extend([fact] * len(term))
                    colors.extend([ct[c] for c in sorted(term)])
                    rads.extend(((arange(1, len(term) + 1) ** 2) * 50)[::-1])

        vecs = zeros((len(fac_list), len(fac_list)))
        for i, fl in enumerate(fac_list):
            for f in fl:
                vecs[i, f] = 1

        # plt.imshow(vecs)

        # ax1 = fig.add_subplot(121)
        # ax2 = fig.add_subplot(122)
        import hcluster

        clusters = hcluster.fclusterdata(vecs, 1.1, criterion="inconsistent", method="complete")

        # ax1.imshow(vecs)
        # ax2.imshow(vecs[argsort(clusters)])

        # raise Exception()

        csrt = argsort(argsort(clusters))
        xvals2 = [csrt[x] for x in xvals]

        # raise Exception()
        plt.scatter(xvals2, yvals, rads, color=colors)
        raise Exception()

    raise Exception()

Beispiel #13

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

def view4_show0(cnames, xvals, gvals, yvals, colors, rads, l_info, gnum=59):

    seen = set()
    offs_mag = 0.3
    xofs, xnew, yofs, ynew = [], [], [], []  # [xv for xv in xvals], [yv for yv in yvals]
    for v in zip(xvals, yvals, gvals):

        xy0 = v[0][0], v[1][0]
        xy = tuple([x for x in xy0])
        # check to see if the current xy has been seen.
        # if so increment until unique.
        while 1:
            if xy in seen:
                xy = tuple([elt + offs_mag for elt in xy])
            else:
                break

        xnew.append([xy[0] for i in range(len(v[0]))])
        ynew.append([xy[1] for i in range(len(v[0]))])

        xofs.append([xy[0] - xy0[0] for i in range(len(v[0]))])
        yofs.append([xy[1] - xy0[1] for i in range(len(v[0]))])

        if v[2][0] != gnum:
            continue
        else:
            seen.add(xy)

    g_equal = nonzero(equal([x for x in it.chain(*gvals)], gnum))[0]
    if len(g_equal) == 0:
        print "G {0} appears to not be in the list".format(gnum)
    gset = set(g_equal)

    xvals_old = xvals
    yvals_old = yvals
    xvals = xnew
    yvals = ynew

    xvals = array(list(it.chain(*xvals)))[g_equal]
    yvals = array(list(it.chain(*yvals)))[g_equal]

    xvals_old = array(list(it.chain(*xvals_old)))[g_equal]
    yvals_old = array(list(it.chain(*yvals_old)))[g_equal]

    xofs = array(list(it.chain(*xofs)))[g_equal]
    yofs = array(list(it.chain(*yofs)))[g_equal]

    colors = array(list(it.chain(*colors)))[g_equal]
    rads = array(list(it.chain(*rads)))[g_equal]

    vecs = zeros((max(xvals_old) + 1, max(yvals_old) + 1))
    for x, y in zip(xvals_old, yvals_old):
        vecs[x, y] = 1.0

    # import hcluster
    # clusters = hcluster.fclusterdata(vecs,.1,criterion='inconsistent',method = 'complete' )
    import mlpy

    HC = mlpy.HCluster(method="euclidean", link="complete")
    clusts = HC.compute(vecs)
    k = 15
    cut = HC.cut(HC.heights[-k])
    cut_s = sort(cut)
    crank = argsort(argsort(cut))

    fig = myplots.fignum(3, (35, 15))
    ax = fig.add_axes([0, 0, 1, 1])

    clst_mems = [cut_s[c] for c in crank]
    clust_colors = ones(3) * linspace(0.2, 0.9, len(set(cut)))[:, newaxis]

    ax.scatter(crank, ones(len(crank)) * 0.5, 1000, color=clust_colors[clst_mems])

    ax.scatter(crank[xvals_old] + xofs, yvals, rads, color=colors)

    ax.annotate(
        "Functional motifs for gene: {0}\nIn {1} clusters".format("gene", 100),
        [0, 1],
        xycoords="axes fraction",
        va="top",
    )

    figname = "gene_{0}_motif_recurrence_circles".format(gnum)
    fig.savefig(figtemplate.format(figname))

Beispiel #14

0

Datei anzeigen

Datei: view_outputs.py Projekt: bh0085/compbio

    outputs = [sio.loadmat(cfg.dataPath("batch/tmp/mcmc_{0:05}_tmp001.mat".format(num))) for num in fnums]

    douts = []
    for output in outputs:
        try:
            o00 = output["out_struct"][0][0]
            dout = dict([(k, o00[i]) for i, k in enumerate([elt[0] for elt in o00.dtype.descr])])
            douts.append(dout)
        except Exception, e:
            continue

    ss, ir = array([(squeeze(o["stay_same"]), squeeze(o["improve_ratio"])) for o in douts]).T
    ss += random.rand(*shape(ss)) / 100
    ir += random.rand(*shape(ir)) / 100

    f = myplots.fignum(1, (8, 8))
    f.clear()
    ax = f.add_subplot(111)
    ax.set_xlabel("Stay Same")
    ax.set_ylabel("Improve Ratio")
    plt.scatter(ss, ir, 5)


def view2():
    files = [l for l in os.listdir(cfg.dataPath("batch/outputs")) if "mcmc" in l]
    ids = [l[0:10] for l in files]
    ids = ids[::10]

    inps = [butils.load_data(i, "input") for i in ids]
    outs = [butils.load_data(i, "output") for i in ids]

Beispiel #15

0

Datei anzeigen

def draw_cm_muscle_congruencies(seqs, profiles, run_id, reset = True):
    print 'computing alignments...'
    print '  ...using muscle'
    malis, mrefs, mpairs =\
            mem.getOrSet(setAlignments, 
                         **mem.rc({},
                                  seqs = seqs, profiles = profiles, 
                                  run_id = run_id, ali_type = 'muscle',
                                  reset = reset,
                                  on_fail = 'compute', 
                                  register = 'tuali_musc_{0}'.format(run_id))) 
    print '  ...using cmalign.'
    salis, srefs, spairs  =\
        mem.getOrSet(setAlignments, 
                     **mem.rc({},
                              seqs = seqs, profiles = profiles, 
                              run_id = run_id, ali_type = 'struct',
                              reset = reset,
                              on_fail = 'compute', 
                              register = 'tuali__struct_{0}'.format(run_id)))
 
    print '  ...making trees.'
    
    for idx, alis in enumerate(zip(malis, salis)):
        m, s = alis
        mtree  = phyml.tree(m,run_id, bionj = True)
        stree  = phyml.tree(s,run_id, bionj = True)
        
        maps = dict([(elt.id,i) for i, elt in enumerate(m)])
        mdists = zeros((len(maps),len(maps)))
        sdists = zeros((len(maps),len(maps)))
        for n1 in mtree.get_terminals():
            for n2 in mtree.get_terminals():
                mdists[maps[n1.name],maps[n2.name]] = \
                    mtree.distance(n1,n2)
        
        for n1 in stree.get_terminals():
            for n2 in stree.get_terminals():
                sdists[maps[n1.name],maps[n2.name]] = \
                    stree.distance(n1,n2)
        tree_similarity(sdists, mdists, '{0}_struct_{1}'.format(run_id,idx), k = len(sdists - 1))
        tree_similarity(sdists, mdists, '{0}_struct_{1}'.format(run_id,idx), k = 6)

        f = myplots.fignum(4, (8,10))
        ct = mycolors.getct(len(mtree.get_terminals()))

        import networkx

        for t, sp, ttype in zip([mtree, stree], [211,212], ['sequence', 'structural']):
            a = f.add_subplot(sp)
            layout = 'neato'
            G = phylo.to_networkx(t)
            Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False)
            posi = networkx.pygraphviz_layout(Gi, layout, args = '')
            posn = dict((n, posi[Gi.node_labels[n]]) for n in G)


            networkx.draw(G, posn, labels = dict([(n, '') for n in G.nodes()]),
                      node_size = [100 if  n.name in maps.keys() else 0 for n in G.nodes()],
                      width = 1, edge_color = 'black',
                      ax = a,
                      node_color = [ct[maps.get(n.name, -1)] for n in G.nodes()] )
        

            a.annotate('Embedded tree for {0} alignment.'.format(ttype),
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,0],textcoords = 'offset pixels')
            a.annotate('Total branch length is {0}'.format(t.total_branch_length()),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')            

        #phylo.draw_graphviz(  mtree,  label_func = lambda x: '', 
        #                      node_color = [ct[maps.get(n.name, -1)] for n in G.nodes()] +\
        #                          [ct[0] for n in mtree.get_nonterminals()], axes = ax)

        datafile = cfg.dataPath('figs/gpm2/pt2_mus_cm_tree_embeddings_{0}_struct_{1}.ps'.format(run_id, idx))
        f.savefig(datafile, dpi = 200, format = 'ps')

Beispiel #16

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def show_paired_v_energy(rname,rfid, all_muts, all_times, structs,rtype):
    if all_times == {}:
        return
    resolved_frac =  [ mean(list(it.chain(*[s_times['frac_resolved'] 
                                       for s_times in t_times.values()])))
                       for t_times in all_times.values()]
    total_lens = [mean(list(it.chain(*[s_times['total_time'] 
                                       for s_times in t_times.values()])))
                  for t_times in all_times.values()]
    total_lens_res = [mean(list(it.chain(*[s_times['total_time_res'] 
                                       for s_times in t_times.values()])))
                  for t_times in all_times.values()]    
    focus_tree = all_times.keys()[argmax(total_lens_res)]
    
    muts = all_muts[focus_tree]
    times = all_times[focus_tree]


    ns = len(muts.keys())
    s2 = dict(structs)
    s2['energies'] = s2['energies'][:ns]
    s2['structs'] = s2['structs'][:ns]
    structs = s2

    energies = structs['energies']


    
    f = myplots.fignum(3,figsize)

    xvals, yvals , pfracs, ugfracs = [], [], [], []
    for i, vals in enumerate(zip(muts.values(),times.values())):
        mvals, tvals = vals
        xvals.append( energies[i])
        
        frac_ug = metric(mvals, tvals, 'frac_ug')[0]
        pfrac,pinds,frac_good = metric(mvals, tvals, 'frac_silent')
        sfrac = metric(mvals, tvals, 'frac_silent')[0]
        

        ugfracs.append(frac_ug)
        pfracs.append(  mean(pfrac))
        yvals.append( mean(sfrac)*frac_good)
        
    colors = array(pfracs)
    colors = (colors - min(colors)) /(max(colors) -  min(colors))
    colors = colors[:,newaxis] * [0,1,0]

    ax = f.add_subplot(111)

    ax.scatter(xvals,yvals,array(ugfracs) * 200,
               color = colors)
    ax.set_ylabel('mutation score')
    ax.set_xlabel('free energy (-kCal)')
    ax.annotate('''Evaluated structures positioned by energy 
and a mutation based evolutionary score.
Color indicates fractional frequency of double mutants.
Radius indicates percentage of ungapped base pairs.''' , [0,1],xycoords = 'axes fraction',
                        xytext = [10,-10], textcoords='offset pixels',
                        va = 'top')
          
    myplots.padded_limits(ax, xvals, yvals, .2)
    f.savefig(figfile.format('{1}_frac_double_{0}'.format(rname,rtype)))
    f.clear()
            
    colors = array(pfracs)
    colors = (colors - min(colors)) /(max(colors) -  min(colors))
    colors = colors[:,newaxis] * [1,0,0]

    f = myplots.fignum(3,figsize)
    


    xvals, yvals , pfracs, ugfracs = [], [], [], []
    for i, vals in enumerate(zip(muts.values(),times.values())):
        mvals, tvals = vals
        xvals.append( energies[i])
        
        frac_ug = metric(mvals, tvals, 'frac_ug')[0]
        pfrac,pinds,frac_good = metric(mvals, tvals, 'frac_paired')
        sfrac = metric(mvals, tvals, 'frac_silent')[0]
        

        ugfracs.append(frac_ug)
        pfracs.append(  mean(pfrac)*frac_good)
        yvals.append( mean(sfrac)*frac_good)
        
    colors = array(pfracs)
    colors = (colors - min(colors)) /(max(colors) -  min(colors))
    colors = colors[:,newaxis] * [1,0,0]

    ax = f.add_subplot(111)

    ax.scatter(xvals,yvals,array(ugfracs) * 200,
               color = colors)
    ax.set_ylabel('mutation score')
    ax.set_xlabel('free energy (-kCal)')
    ax.annotate('''Evaluated structures positioned by energy 
and a mutation based evolutionary score.
Color indicates a second score from paired BL.
Radius indicates percentage of ungapped base pairs.''' , [0,1],
                xycoords = 'axes fraction',
                        xytext = [10,-10], textcoords='offset pixels',
                        va = 'top')
          
    myplots.padded_limits(ax, xvals, yvals, .2)
    f.savefig(figfile.format('{1}_frac_silent_{0}'.format(rname,rtype)))


    seq = structs['seq']
    n, selection_type = [4,'both']
    idxs = get_interesting_inds(xvals, yvals, structs, energies, pfracs,
                                rname, rtype, ns,rfid,figsize, colors,
                                seq,n,selection_type)

    if draw_single:
        show_rna_structs(xvals, yvals, structs, energies, pfracs,
                         rname, rtype, ns,rfid,figsize, colors,
                         seq,n,selection_type, idxs)
    if draw_many:
        for n, selection_type in \
                [[5,'ptime'],[5,'energy'],[ns,'energy']]:
            
            m_idxs = get_interesting_inds(xvals, yvals, structs, energies, pfracs,
                                rname, rtype, ns,rfid,figsize, colors,
                                seq,n,selection_type)
            show_rna_structs(xvals, yvals, structs, energies, pfracs,
                                    rname, rtype, ns,rfid,figsize, colors,
                                    seq,n,selection_type, m_idxs)
        
    return idxs,focus_tree

Beispiel #17

0

Datei anzeigen

Datei: utils.py Projekt: bh0085/projects

def old_clusters():
    
    plf = myplots.fignum(6, (8,8))
    plf.clear()
    ax = plf.add_subplot(211)
    
    if do_rnd:
        hstart = .15
    else:
        hstart = 3.

    all_vars = []
    all_vars_n = []
    all_clusters = []
    ks = []

    all_Bvars, all_Wvars = [], []
    theights = log(HC.heights[greater(HC.heights,hstart)][-len(cvecs)/2:][:-1])

    for xval, h in enumerate(theights):
        clustering = HC.cut(exp(h))
        casrt = argsort(clustering)
        csrtd = clustering[casrt]
        d = dict([(k,array(list(g))) for k, g in it.groupby(zip(casrt,csrtd), 
                                            key = lambda x: x[1])])
        lens = array([len(v) for v in d.values()],float)
        nlens = lens / max(lens)
        
        cmeans = array([mean(cvecs[idxs[:,0],:],0)
                        for idxs in d.values()])
        Wvars = np.sum(array([np.mean(  (cvecs[idxs[:,0],:] - cmeans[i])  **2 )
                              for i, idxs in enumerate(d.values())]))

        
    
        Bvars = np.sum(lens[:,newaxis]* ( (cmeans - mean(cvecs,0)) **2)  )
    
        ks.append(len(d))

        all_Wvars.append(Wvars)
        all_Bvars.append(Bvars)
        

        cluster_vars =array([ sum(var(cvecs[idxs[:,0],:],0)) 
                              for idxs in d.values() ])

        
        cluster_vars_n =array([ sum(var(cvecs[idxs[:,0],:],0)) 
                              for idxs in d.values() ])/(lens)

        all_clusters.append([cvecs[idxs[:,0]]
                             for idxs in d.values() ])
        all_vars.append(cluster_vars)
        all_vars_n.append(cluster_vars_n)
        colors = array(argsort(argsort(lens)),float)/len(lens)
        
        ax.scatter(0*(cluster_vars) +  h , cluster_vars_n, 20,
                   color = array(array([0.,1.,0.]) * colors[:,newaxis]))

    ax2 = plf.add_subplot(212)
    all_Bvars = array(all_Bvars)
    all_Wvars = array(all_Wvars)
    
    density_based = False
    HC_based = True
    if density_based:
      #ax3 = plf.add_subplot(212,frameon = False)
      
      #COMPUTE A COALESCENCE RATE OF CLUSTERS
      #(divide the pde for heights by clustering size)
      from scipy.stats import gaussian_kde
      data = (theights)
      
      density = gaussian_kde(data)
      density_rate = 1. / array([len(v) for v in all_vars])
      xs = (theights)
      
      density.covariance_factor = lambda : .25
      density._compute_covariance()
      yvals , colors = array([density(xs), 
                              density(xs) * density_rate,
                              [sum(v/[len(c) for c in cs] )
                               for v,cs in zip(all_vars,all_clusters) ]]),\
                              [[1,0,0],[0,1,0],[0,0,1]]
      yvals /= np.max(yvals,1)[:,newaxis]
      xvals = (xs) + zeros(len(yvals))[:,newaxis]
      
      #for i in range(len(yvals)):
      #    ax2.plot(xvals[i],yvals[i], color = colors[i])
      
      dens_n = yvals[1]
      #dens_n[greater(dens_n,percentile(dens_n,60))] = percentile(dens_n,60)
      #dens_n[less(dens_n, percentile(dens_n,.25))] = percentile(dens_n,25)
      diff = dens_n - yvals[2]
      ax2.plot(xvals[0], yvals[1] - yvals[2], linewidth = 10)
      
      import scipy.signal as ss
      #diff =ss.medfilt(diff,3)
      ax2.plot(xvals[0], diff, linewidth = 5, color = 'orange')
      
      mpt = argmin(diff)
      #m#pt = len(yvals[0]) - 3
      
      ax2.scatter([xvals[0][mpt]],diff[mpt], 200, color = 'red')
      
      ax2.plot(xvals[0],yvals[1])
      
      #ax2.plot(histogram(log(HC.heights[greater(HC.heights,hstart)]))[1][:-1],
      #         histogram(log(HC.heights[greater(HC.heights,hstart)]))[0])
      #raise Exception()
    else:
        ax2.plot(ks,all_Bvars)
        ax2.plot(ks,all_Wvars)
        
        hcfun =  array(all_Bvars) / array(all_Wvars) /\
            ((array(ks,float) -1) / array(float(len(cvecs)) - array(ks,float)))

        
        hcfun = nan_to_num(hcfun)
        
        mpt = argmax(hcfun)

        
        a3 = plf.add_subplot(212, frameon = False)
        a3.plot(ks, hcfun)

        ax2.scatter([ks[mpt]]*2, [all_Bvars[mpt],all_Wvars[mpt]], 200, color = 'orange')

Beispiel #18

0

Datei anzeigen

def get_seq_groups(rfid = 'RF00167', reset = True, tree = True,
        draw_distances = draw_all_easy,
        draw_clusters = draw_all_easy,
        draw_single_cluster = draw_all_hard):
    '''
Run the tree computation for each clsuter in the rfam family.
(Or just one)

1) Compute clusters using a distance measure derived either 
   phyml or a simple levenshtein dist.

   kwds:
     tree          [True]  Use a tree or just a levenshtein 
                           distance to get distances for
                           init clustering.

2) Choose a cluster of well related sequences and for this 
   this cluster, compute an alignment (For each structure 
   using phase or for sequences using MUSCLE)
  
   kwds:
     struct_align  [True]   Whether to compute structural 
                            alignments or use MUSCLE

'''
    rutils = utils

    ali, tree, infos = rfam.get_fam(rfid)
    n = len(ali)

    if draw_distances:
        dists_t = seq_dists(ali,rfid, tree = True)
        dists_l = seq_dists(ali,rfid, tree = False)
        dtf = dists_t.flatten()
        dlf = dists_l.flatten()
        lin = linregress(dtf, dlf)
        rsquared = lin[2]**2

        f = myplots.fignum(5, (7,7))
        ax = f.add_subplot(111)
        ax.annotate('Levenshtein distance vs. BioNJ branch lengths',
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,-10],textcoords = 'offset pixels')
        ax.annotate('R-Squared: {0}'.format(rsquared),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')
        ax.set_xlabel('BIONJ Tree ML Distance')
        ax.set_ylabel('Levenshtein Distance')

        ax.scatter(dtf, dlf, 100)
        
        datafile = cfg.dataPath('figs/gpm2/pt2_lev_tree_dists.tiff')
        f.savefig(datafile)
        
    dists = mem.getOrSet(setDistances, ali = ali, tree = tree, run_id = rfid,
                         register = rfid, 
                         on_fail = 'compute',
                         reset = reset)
    
    clusters = maxclust_dists(dists, k = 5, method = 'complete')
    clusters -= 1

    if draw_clusters:

        ct = mycolors.getct(len(set(clusters)))
        colors = [ct[elt] for elt in clusters]
        pca_vecs = mlab.PCA(dists).project(dists) 
        
        f = myplots.fignum(5, (8,8))
        ax = f.add_subplot(111)
        ax.annotate('Rfam sequence clusters in first 2 PC of sequence space.',
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,-10],textcoords = 'offset pixels')
        ax.annotate('Number of Clusters: {0}'.format(len(ct)),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')
        ax.set_xlabel('PC 1')
        ax.set_ylabel('PC 2')

        ax.scatter(pca_vecs[:,0],pca_vecs[:,1], 20, color = colors)
        
        datafile = cfg.dataPath('figs/gpm2/pt2_all_seqs_clustered.ps')
        f.savefig(datafile)        

    #now take the largest cluster and do the analysis.
    
    cgrps = dict([ (k, list(g)) 
              for k , g  in it.groupby(\
                sorted( list(enumerate(clusters)),key = lambda x: x[1]),
                key = lambda x: x[1])])
    cbig = argmax([len(x) for x in cgrps.values()])
    cluster_seqs = [ elt[0] for elt in cgrps.values()[cbig] ] 
    csize = len(cluster_seqs)
    seqs =[ali[c] for c in cluster_seqs]

    
    
    if 0:

        ct = mycolors.getct(2)
        pca_vecs = mlab.PCA(dists).project(dists) 
        colors =[ct[1] if elt in cluster_seqs else ct[0] for elt in range(len(pca_vecs))] 
        
        f = myplots.fignum(5, (8,8))
        ax = f.add_subplot(111)
        ax.annotate('Inter and intra cluster distances vs. PC0 component for chosen cluster.',
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,-10],textcoords = 'offset pixels')
        ax.annotate('Number of cluster sequences: {0}, Number of total sequences'.format(csize, n  - csize),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')
        ax.set_xlabel('PC 0')
        ax.set_ylabel('Distance')


        for s in cluster_seqs:
            ax.scatter(pca_vecs[:,0],dists[s,:] ,200 *exp(-(dists[s,:] / .5) **2),  color = colors, alpha = .2)
        
        datafile = cfg.dataPath('figs/gpm2/pt2_focused_cluster_dists.ps')
        f.savefig(datafile)        
        
    clusters_final  = [ [ elt[0] for elt in cgrps.values()[i] ] for i in range(len(cgrps.values()))]
    seqs_final = [ [ ali[idx] for idx in clust ] for clust in clusters_final]
    return seqs_final

Beispiel #19

0

Datei anzeigen

Datei: score_utils.py Projekt: bh0085/projects

def show_paired_v_energy(rname, rfid, all_muts, all_times, structs, rtype):
    if all_times == {}:
        return
    resolved_frac = [
        mean(
            list(
                it.chain(
                    *
                    [s_times['frac_resolved']
                     for s_times in t_times.values()])))
        for t_times in all_times.values()
    ]
    total_lens = [
        mean(
            list(
                it.chain(
                    *[s_times['total_time'] for s_times in t_times.values()])))
        for t_times in all_times.values()
    ]
    total_lens_res = [
        mean(
            list(
                it.chain(*[
                    s_times['total_time_res'] for s_times in t_times.values()
                ]))) for t_times in all_times.values()
    ]
    focus_tree = all_times.keys()[argmax(total_lens_res)]

    muts = all_muts[focus_tree]
    times = all_times[focus_tree]

    ns = len(muts.keys())
    s2 = dict(structs)
    s2['energies'] = s2['energies'][:ns]
    s2['structs'] = s2['structs'][:ns]
    structs = s2

    energies = structs['energies']

    f = myplots.fignum(3, figsize)

    xvals, yvals, pfracs, ugfracs = [], [], [], []
    for i, vals in enumerate(zip(muts.values(), times.values())):
        mvals, tvals = vals
        xvals.append(energies[i])

        frac_ug = metric(mvals, tvals, 'frac_ug')[0]
        pfrac, pinds, frac_good = metric(mvals, tvals, 'frac_silent')
        sfrac = metric(mvals, tvals, 'frac_silent')[0]

        ugfracs.append(frac_ug)
        pfracs.append(mean(pfrac))
        yvals.append(mean(sfrac) * frac_good)

    colors = array(pfracs)
    colors = (colors - min(colors)) / (max(colors) - min(colors))
    colors = colors[:, newaxis] * [0, 1, 0]

    ax = f.add_subplot(111)

    ax.scatter(xvals, yvals, array(ugfracs) * 200, color=colors)
    ax.set_ylabel('mutation score')
    ax.set_xlabel('free energy (-kCal)')
    ax.annotate('''Evaluated structures positioned by energy 
and a mutation based evolutionary score.
Color indicates fractional frequency of double mutants.
Radius indicates percentage of ungapped base pairs.''', [0, 1],
                xycoords='axes fraction',
                xytext=[10, -10],
                textcoords='offset pixels',
                va='top')

    myplots.padded_limits(ax, xvals, yvals, .2)
    f.savefig(figfile.format('{1}_frac_double_{0}'.format(rname, rtype)))
    f.clear()

    colors = array(pfracs)
    colors = (colors - min(colors)) / (max(colors) - min(colors))
    colors = colors[:, newaxis] * [1, 0, 0]

    f = myplots.fignum(3, figsize)

    xvals, yvals, pfracs, ugfracs = [], [], [], []
    for i, vals in enumerate(zip(muts.values(), times.values())):
        mvals, tvals = vals
        xvals.append(energies[i])

        frac_ug = metric(mvals, tvals, 'frac_ug')[0]
        pfrac, pinds, frac_good = metric(mvals, tvals, 'frac_paired')
        sfrac = metric(mvals, tvals, 'frac_silent')[0]

        ugfracs.append(frac_ug)
        pfracs.append(mean(pfrac) * frac_good)
        yvals.append(mean(sfrac) * frac_good)

    colors = array(pfracs)
    colors = (colors - min(colors)) / (max(colors) - min(colors))
    colors = colors[:, newaxis] * [1, 0, 0]

    ax = f.add_subplot(111)

    ax.scatter(xvals, yvals, array(ugfracs) * 200, color=colors)
    ax.set_ylabel('mutation score')
    ax.set_xlabel('free energy (-kCal)')
    ax.annotate('''Evaluated structures positioned by energy 
and a mutation based evolutionary score.
Color indicates a second score from paired BL.
Radius indicates percentage of ungapped base pairs.''', [0, 1],
                xycoords='axes fraction',
                xytext=[10, -10],
                textcoords='offset pixels',
                va='top')

    myplots.padded_limits(ax, xvals, yvals, .2)
    f.savefig(figfile.format('{1}_frac_silent_{0}'.format(rname, rtype)))

    seq = structs['seq']
    n, selection_type = [4, 'both']
    idxs = get_interesting_inds(xvals, yvals, structs, energies, pfracs, rname,
                                rtype, ns, rfid, figsize, colors, seq, n,
                                selection_type)

    if draw_single:
        show_rna_structs(xvals, yvals, structs, energies, pfracs, rname, rtype,
                         ns, rfid, figsize, colors, seq, n, selection_type,
                         idxs)
    if draw_many:
        for n, selection_type in \
                [[5,'ptime'],[5,'energy'],[ns,'energy']]:

            m_idxs = get_interesting_inds(xvals, yvals, structs, energies,
                                          pfracs, rname, rtype, ns, rfid,
                                          figsize, colors, seq, n,
                                          selection_type)
            show_rna_structs(xvals, yvals, structs, energies, pfracs, rname,
                             rtype, ns, rfid, figsize, colors, seq, n,
                             selection_type, m_idxs)

    return idxs, focus_tree