예제 #1
0
파일: view.py 프로젝트: bh0085/compbio
def view_in():
    na = nu.net_affinity()
    f = plt.figure(0)
    f.clear()
    ax = f.add_subplot(111)
    in_degree = sum(na, 0)
    srt = argsort(in_degree)

    sm.seismic([in_degree[srt]], ax=ax)

    myplots.maketitle(ax, 'In degree, sorted')
예제 #2
0
def tree_similarity(dist1, dist2, run_id,criterion = 'knn', k = 6):
    if criterion == 'knn':
        nq = len(dist1)
        nb1 = argsort(dist1, 1)[:,1:k+1]
        nb2 = argsort(dist2, 1)[:,1:k+1]
        all_nbs = [set(n1).union(set(n2)) for n1, n2 in zip(nb1, nb2)]
        nb_intersection = [set(n1).intersection(set(n2)) for n1, n2 in zip(nb1, nb2)]
        nb_dists = [ array([[dist1[i, n], dist2[i,n]]for n in nbs ]) for i,nbs in enumerate(all_nbs)]
        #take the first k distances.
        n_disagreements = [len(nbd) - k for nbd in nb_dists]
        nb_dists = array([ sorted(nbd, key = lambda x: min(x))[:k] for nbd in nb_dists])

        frac_diffs = [abs(diff(elt, 1).flatten()) / mean(elt,1) for  elt in nb_dists]
        abs_diffs = [abs(diff(elt, 1).flatten()) for  elt in nb_dists]
        
        ct = mycolors.getct(nq)
        f = myplots.fignum(4, (10,8))
        ax = f.add_axes([.05,.08,.25,.87])
        seismic.seismic(abs_diffs, ax = ax, colors = ct)
        
        jaccard = mean([float(len(nb_intersection[i])) / float(len(all_nbs[i])) for i in range(nq)])

        ax2 = f.add_axes([.34,.08,.6,.87])
        for i,d in enumerate(nb_dists):
            ax2.scatter(d[:,0], d[:,1], 20, alpha = .5,color =ct[i])

        
        lin = linregress(nb_dists[:,:,0].flatten(),nb_dists[:,:,1].flatten())
        rsquared = lin[2]**2

        ax2.annotate('NN dists for multi/struct-aligned trees.\nK = {0}'.format(k),
                    [0,1], xycoords = 'axes fraction', va = 'top',
                    xytext = [10,-10],textcoords = 'offset pixels')
        ax2.annotate('R-Squared: {0:3.3}\nJaccard Index: {1:3.3}'.format(rsquared, mean(jaccard)),
                    [1,0], xycoords = 'axes fraction', ha = 'right',
                    xytext = [-10,10],textcoords = 'offset pixels')
        ax2.set_xlabel('Muscle aligned tree distances')
        ax2.set_ylabel('Struct algined tree distances')
        
        datafile = cfg.dataPath('figs/gpm2/pt2_mus_cm_tree_dists_{0}_k{1}.tiff'.format(run_id, k))
        f.savefig(datafile)
예제 #3
0
파일: exp.py 프로젝트: bh0085/compbio
def cluster_exprs(all_members, ct_data,
                  do_plot = False,
                  cluster_type = '4d',
                  cluster_id = 4):
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)

  c = all_members[cluster_id]
  c_unq = set(list(c))
  

  tissues = dict([('t_{0}'.format(i) , dict(cts = ct_data[equal(c,elt)]))
                  for i, elt in enumerate(c_unq)])
  
  nt = 6
  counts = array([[sum(equal(v['cts'][:,1],t))
                   for t in range(nt) ] 
                  for v in tissues.values() ])
  

  if do_plot:
    f = plt.figure(1)
    f.clear()
  
    ax1 = f.add_subplot('121')
    ax2 = f.add_subplot('122')
    seismic.seismic(counts , ax = ax1,stacked = True,colors = mycolors.getct(len(counts)))
    #seismic.seismic(np.sort(counts,0) , ax = ax2,stacked = False,colors = mycolors.getct(len(counts)))
    ax2.hist(np.sum(counts,1))
    
  
  all_exprs = {}
  for t, v in tissues.iteritems():
    ct_all = v['cts']
    
    for time in set([c[1] for c in ct_all]):
      ct = [ct for ct in ct_all if ct[1] == time]

      exprs =dict( [(k,elt['vals'][zip(*ct)]) for k, elt in mrnas.iteritems()])
      ys = misc['y']['vals'][zip(*ct)] #zip(*sim_xy)]
      zs = misc['z']['vals'][zip(*ct)] #zip(*sim_xy)]
      xs = misc['x']['vals'][zip(*ct)] #zip(*sim_xy)]

    
      f = plt.figure(1)
      f.clear()
      ax1 = f.add_subplot('121', title = 'X-Z axis view for tissue {0}'.\
                            format(t))
      ax2 = f.add_subplot('122',title = 'Y-Z axis view for tissue {0}'.\
                            format(t))
      ax1.scatter(xs, zs)
      ax2.scatter(ys, zs)
      
      v['exprs'] = exprs
      all_exprs['tiss_{0}_time_{1}'.format(t,time)]=exprs
      
      sio.savemat(open(cfg.dataPath('soheil/expression_c{0}_n{1}_tissue{2}_time{3}.mat'.\
                                      format(cluster_type,cluster_id,t,time)),'w'),
                  exprs)
      f.savefig(open(cfg.dataPath('soheil/expression_c{0}_n{1}_tissue{2}_time{3}.tiff'.\
                                      format(cluster_type,cluster_id,t,time)),'w'))
    
    
      
  exprs_out = dict([( k, [ mean(sub[k]) for sub in all_exprs[k].values() ]) 
                    for k in all_exprs.keys() ])

  sio.savemat(open(cfg.dataPath('soheil/expression_c{0}_n{1}_intercluster.mat'.\
                                    format(cluster_type,cluster_id)),'w'),
              exprs_out)
  
  raise Exception()
예제 #4
0
파일: __init__.py 프로젝트: bh0085/compbio
def sig_grid(num = 1 ,  method = 'tree', reset = False,
             plot_kcs = True,
             bp_means = False,
             bp_zeros = True, zero_ofs = 1e-6,
             bp_logs = True,
             show_kos = False,
             filter_rows_and_cols = False):



  #Make and annotate the heatmap figure
  f = plt.figure(1, facecolor = 'w')
  f.clear()
  axdims= .9
  ax_box = array([.05,.05,axdims,axdims])
  sg_big_hm_annotations(f, ax_box)

  #Set up the sizes of each group axis in the heatmap figure
  kwts = float(sum([len(v) for  v in exps.values()]))
  mwidth = .015
  msize = mwidth*kwts
  kw_total = kwts +  ( msize * (len(exps)-1))
  ofs = 0


  allow_tf_kn = False
  if not allow_tf_kn: grid[zip(*knockout_cells)] = 0

  #Some more heatmap cfguration.
  saturation = [np.percentile(grid[nonzero(greater(grid,0))],10),
                np.percentile(grid[nonzero(greater(grid,0))],90)]
  tf_srt = argsort(np.mean(grid,1))
  all_bps = []
  expsums = [np.mean( grid.T[v,:], 1) for v in exps.values()]
  max_sum = np.max((list(it.chain(*expsums))))

  #For each experiment class, plot a heatmap and overlay per exp sums
  for k , v in exps.iteritems():
    #Axes positioning
    wid = len(v)
    ax_ofs =  array([ofs/kw_total, 0, (wid) / kw_total,1.])
    ax_box = array([.05,.05,0.,0.])
    ax_ofs = (ax_ofs * axdims) + ax_box

    #Make heatmap axes.
    ax = f.add_axes(ax_ofs, frameon = False)
    sums = np.mean(grid.T[v,:],1)
    exp_srt = argsort(sums)[::-1]
    hm.heatMap( grid.T[v[exp_srt],:][:,tf_srt], axes = ax,
                vmin = saturation[0],
                vmax = saturation[1])

    #Make overlay axes.
    ax2 = f.add_axes(array(ax_ofs) +  array([0,0,0,0]),
                     frameon = True,
                     axisbg = 'none',
                     xticks = [],
                     yticks = [])
    
    #Make the axes look the way I like em
    for a in ax2.spines.values():
      a.set_linewidth(2)
      a.set_alpha(.5)
    these_knockouts = nonzero([c [1]in v for c in knockout_cells])
    kc = knockout_cells[these_knockouts]
    kv = knockout_vals[these_knockouts]
    
    #If plot kcs is selected, plot the cells corresponding to TF deletion/OE
    if plot_kcs:  
      if len(kc) > 0:
        ax.scatter(*zip(*[( list(v).index(x[1]),x[0]) for x in kc]), s =50, 
                  color = 'none', edgecolor = 'black', linewidth = 3)
    color = 'blue'
    ax2.plot(sums[exp_srt],
            linewidth = 4, color = color)

    if bp_means: bpelts = sums
    else: bpelts = grid.T[v,:].flatten()
    if not( bp_zeros ): bpelts = bpelts[nonzero(bpelts)]
    all_bps.append(bpelts)

    ax2.set_xlim([0,wid])
    ax2.set_ylim([0,max_sum])
    ax.set_xlim([0,wid])
    ax.set_ylim([0,shape(grid)[0]])
    ax2.set_xticks([])

    #Annotate each axios
    tbb = matplotlib.transforms.Bbox(ax2.bbox).translated(0,-20)
    t = ax2.text(-2,0, k, 
                 va = 'bottom', ha = 'right',
                 rotation = 90, color = 'black',
                 size = 'x-large', family = 'serif')
    ofs +=  wid + msize


  #Make the boxplot figure
  f2 = plt.figure(3)
  plt.clf()

  if bp_means:  bp_kos =  array([  mean(grid.T[g[0],:],0) 
                             for g in it.groupby(sorted(\
        [ko[1] for ko in knockout_cells]))
                             ])
  else: bp_kos = array(knockout_vals)
  if not bp_zeros: bp_kos = bp_kos[nonzero(bp_kos)]

  all_bps = all_bps +  [bp_kos]

  ax3 = f2.add_subplot('111')
  if bp_logs: all_bps = [log(b + zero_ofs) for b in all_bps]
  bp_lzero = log(zero_ofs)

  boxplots = ax3.boxplot([bp for bp in all_bps], widths= .5)
  for p in boxplots.values():
      for e in p: e.set_linewidth(4)    

  #Annotate the boxplot figure
  ann_str = ''
  for i in range(8):
    ann_str += '{0}: {1}\n'.format(i+1, (exps.keys() + ['TF Knockout/OE'])[i])
  ax3.annotate(ann_str, [0,1],xycoords = 'axes fraction',
               xytext = [10,-10], textcoords = 'offset pixels',
               va = 'top', ha = 'left')
  ax3.set_title('''Boxplot of significances per experiment type for {3} learning method, Net {4} 

Filtered out were {0} cells corresponding to {1} TFs Knocked out or OverExpressed.
{2} of these cells have nonzero importance and are plotted at x=9,

Showing Means: {5}, Showing zeros: {6}, Plotting logs {7}'''.\
                  format(len(knockout_cells), len(knockout_tfs),
                         len(nonzero(knockout_vals)[0]), 
                         method, num,
                         bp_means, bp_zeros, bp_logs))
  ax3.set_ylabel('significance')
  ax3.set_xlabel('experiment class')
  
  f.savefig(cfg.dataPath('daniel/figs/{0}_net{1}_heatmaps.tiff'.format(method, num)),
            format = 'tiff')

    
  plam = lambda: filter_rows_and_cols and 'nonzero_exps_and_tfs_cells_log/'\
      or bp_zeros and not bp_logs and bp_means and 'zeros_means_nolog/'\
      or not bp_zeros and bp_means and not bp_logs and 'nozeros_means_nolog/'\
      or not bp_zeros and bp_means and bp_logs and 'nozeros_means_log/'\
      or bp_zeros and not bp_means and not bp_logs and 'zeros_cells_nolog/'\
      or not bp_zeros and not bp_means and not bp_logs and 'nozeros_cells_nolog/'\
      or not bp_zeros and not bp_means and bp_logs and 'nozeros_cells_log/'

  dataDir = cfg.dataPath('daniel/figs/{2}{0}_net{1}_boxplots.tiff'.\
                              format(method, num,plam()))
  print 'saving {0}'.format(dataDir)
  if not os.path.isdir(os.path.dirname(dataDir)): os.mkdir(os.path.dirname(dataDir))
  if os.path.isfile(dataDir): os.remove(dataDir)
  f2.savefig(dataDir,    format = 'tiff')
  
  
  

  mean_xvals = [ mean(all_bps[i][nonzero(greater(all_bps[i],bp_lzero))]) for i in range(len(all_bps))]
  pdfs, xvals = zip(*[histogram(x, bins=50, range=[-15,8], normed=False) for x in all_bps])
  import compbio.utils.colors as colors
  c = colors.getct(len(pdfs))
  f3 = plt.figure(3)
  f3.clear()
                                   
  sax = f3.add_subplot('111')
  seismic.seismic([array(x,float)/ sum(x) for x in pdfs], xax = xvals[0][:-1],stacked = False, colors = c, xmarkpts = mean_xvals, ax = sax)
  

  f4 = plt.figure(4)
  f4.clear()
  ax = f4.add_subplot('121')
  ax.set_title('(log base 10) of Percentage Nonzero for Experiment Classes')
  percs = log10(array([100*float(len(nonzero(greater(x,bp_lzero))[0])) / len(x) for x in all_bps]))
  ax.plot(percs,linewidth = 6)
  ax.set_yticks(percs)

  names = exps.keys() + ['TF Knockout/OE']
  ax.set_yticklabels(['{1}\n{0}'.format('%2.2f' % (10**p), names[idx]) for idx,p in enumerate(percs)])
  
  ax2 = f4.add_subplot('122')
  ax2.set_title('Mean of Nonzero Experiments for Experiment Classes')
  means = array([mean(bp[nonzero(greater(bp,bp_lzero))]) for bp in all_bps])
  ax2.plot(arange(1,9), means,linewidth = 6)
  ax2.boxplot( [bp[nonzero(greater(bp,bp_lzero))] for bp in all_bps], widths = .5)
  ax2.set_yticks(means)

  names = exps.keys() + ['TF Knockout/OE']
  ax2.set_yticklabels(['{1}\n{0}'.format('%2.2f' % (p), names[idx]) for idx,p in enumerate(means)])