Example #1
def show_3d(fbid, scatter = False,
            step = 0, skip = 10,ofs = [0,0,0], ax = None, **kwargs):
  '''Plot the expression of a given mrna in 3d.
If you pass it an axes, you can get hte same axis back and run the script again to plot other genes.

step, skip set which timestep and how many nuclei to plot.

ax sets the plotting ax
**kwargs sets keywords in the scatterplot

  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)  
  assert fbid in mrnas.keys(), 'No BDTNP data for {0}'.format(fbid)

  xs,ys,zs = array([misc[k]['vals'][::skip,step] 
                    for k in ('x','y','z')]) + ofs[:,newaxis]

  sizes = array(mrnas[fbid]['vals'][::skip,step])
  sizes -= min(sizes)
  sizes/= max(sizes)/10. if max(sizes)  > 0 else 1
  if not ax:
    f = plt.figure()
    ax  = f.add_subplot(111, projection = '3d')
  if scatter:  
        ax.scatter(xs, ys, zs,s = sizes, **kwargs) 
        ax.plot(xs, ys, zs, **kwargs) 
Example #2
def getClusteringInputs( ncluster =1000, host = 'tin', 
                           reset = 0, step = 10, exemp_time = 'all',
                           doplot = False):
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)
  vals = array([v['vals'] for v in mrnas.values()])
  gvars = var(vals, 1)
  gminvars = np.min(gvars,1)
  gmedvars = median(gvars,1)

  min20 = argsort(gminvars)[::-1][:20]
  med20 = argsort(gmedvars)[::-1][:20]

  int20 = set(min20).intersection(set(med20))
  xgenes = array(list(int20))

  cell_data = vals[xgenes].transpose(1,2,0)
  scd = shape(cell_data)
  #times = reshape(zeros(shape(cell_data[0:2]))[:,:,newaxis , arange(shape(cell_data[1]))
  #                    , (prod(shape(cell_data)[0:2])))
  xycoords = (arange(scd[0])[:,newaxis,newaxis]*[1,0] +\
  cell_data = reshape(cell_data, (prod(shape(cell_data)[0:2]), shape(cell_data)[2] ))
  xy_data = reshape(xycoords, (prod(scd[0:2]),2 ))
  if exemp_time == 'all':
    inds = arange(len(cell_data))
    inds = arange(len(cell_data))[nonzero(equal(xy_data[:,1],exemp_time))[0]]
  rand_thousand = inds[0:ncluster]
  sim_data = cell_data[rand_thousand]
  sim_xy = xy_data[rand_thousand]
  t = [ mean(sim_data, 0), std(sim_data,0)]
  t[1][equal(t[1],0)] = 0
  metric = 'neg_dist'
  sims = similarity(sim_data, transform = t, method = metric)

  name = 'll_{0}_{1}_{2}'.format(metric,ncluster,exemp_time)

  d_in = []
  percs = logspace(.1,1.5,8)
  for p in percs:
    d_in.append(dict(similarities = sims,
                     self_similarity = ss.scoreatpercentile(sims, p),
                     metric = metric
  return d_in
Example #3
def p_m_correlation():
  prots = nio.getBDTNP(protein = True)
  mrnas = nio.getBDTNP()
  matched = set(mrnas.keys()).intersection(set(prots.keys()))
  pairs = [(prots[k] , mrnas[k], k) for k in matched]

  f = plt.figure(0)
  f.suptitle('mRNA and Protein Levels from BDTNP at six times in ~6000 cells', fontsize = 22)
  nx = ny = ceil(sqrt(len(pairs)))
  shp = shape(mrnas.values()[0]['vals'])
  colors = mycolors.getct(shp[1])
  shr = None
  for i, p in enumerate(pairs):
    ax = f.add_subplot('{0:g}{1:g}{2:g}'.format(nx, ny , i+1),
                       sharex = shr,sharey = shr)
    if not shr: shr = ax
    fbid = p[-1]
    #    fbid, nu.gene_symbol(fbid), tw.fill(nu.gene_biology(fbid), 75)))
    ax.grid(True, alpha = .2)
    ax.annotate(nu.gene_symbol(fbid),xy = [.02,.98], 
                xycoords = 'axes fraction', size = 25, va = 'top')
    mu = corrcoef(p[0]['vals'][::,:].flatten(),p[1]['vals'][::,:].flatten())
    ax.annotate('$\mu = {0:.2g}$'.format(mu[0,1]),xy = [.98,.98],
                xycoords = 'axes fraction', size = 25,ha = 'right', va = 'top')

    if mod(i, nx) >0: 
      plt.setp( ax.get_yticklabels(), visible=False)
    else:  ax.set_ylabel('mrna expression level')
      #plt.setp( ax.get_ylabel(), visible=False)
    if floor(i/nx) < (ny -1) : 
      plt.setp( ax.get_xticklabels(), visible=False)
    else:  ax.set_xlabel('protein expression level')

      #plt.setp( ax.get_xlabel(), visible=False)

    for j in range(shp[1]):
                 s = 20,alpha = .2,color = colors[j])
                            ),format = 'tiff')
Example #4
def show_multi(timepoint = -1):
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)
  shp = shape(mrnas.values()[0]['vals'])

  #choose to look only at one timepoint
  stds = [std(m['vals'][:,timepoint]) for m in mrnas.values()]
  f = plt.figure(0)
  try: f.clear()
  except Exception, e: print 'hi'
  ax = f.add_subplot(111, projection = '3d')
  vsort = argsort(stds)[::-1]
  n = 10
  colors = mycolors.getct(n)
  for i in arange(n):
    step = argmax(np.sum(mrnas.values()[vsort[i]]['vals'],0))
            step = step, skip = 20, ax = ax, ofs =10*random.rand(3),
            color = colors[i])
Example #5
def cluster_tissues(nx = 20,ny = 500, timepoint = -1,
                    step = 4,
                    sim = 'neg_dist', 
                    imshow_sims = False,
                    scatter_sims = False,
                    hist_sims = False,
                    do_cluster= True,
                    do_show = True, cstep = -1):
  '''Cluster ny nuclei by the values of the nx mRNAs with highest
  variance. Uses the medioids method with number of clusters set
  by exemplar self simalarity as outlined in 6.874 and implemented
  at http://www.psi.toronto.edu/affinitypropagation 


  nx:           number of genes to cluster upon
  ny:           number of cells in the clusterin
  timepoint:    which time to use for cluster computation
  step:         how many genes to skip when showing results
  So far I have implemented a distance based similarity and a 
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)
  shp = shape(mrnas.values()[0]['vals'])

  #choose to look only at one timepoint
  stds = [std(m['vals'][:,timepoint]) for m in mrnas.values()]
  vsort = argsort(stds)[::-1]
  xinds = vsort[:nx]

  #Choose the most variable factors and use them as the 
  #underlying variables from which to construct a similarity
  nuclei =array([ mrnas.values()[idx]['vals'][:,timepoint]
                  for idx in xinds]).T

  t = [ mean(nuclei, 0), std(nuclei,0)]
  t[1][equal(t[1],0)] = 0
  sims = similarity(nuclei, transform = t, method = sim)
  cluster_inds = array(floor(linspace(0,len(nuclei)-1, ny)), int)  
  cluster_training = sims[cluster_inds,:][:,cluster_inds]

  f = plt.figure(0)

  #, projection = '3d')

  if scatter_sims:
    ax = f.add_subplot(111)
    scatterx = [cluster_sims[i] for i in range(ny) for j in range(ny)]
    scattery = [cluster_sims[j] for i in range(ny) for j in range(ny)]
    ax.scatter(scatterx, scattery, s =3, alpha = .1)

  if imshow_sims:
    ax = f.add_subplot(111)
    cmap = mycolors.blackbody()
    ax.imshow(cluster_sims, cmap = cmap, interpolation = 'nearest')

  if hist_sims:
    ax = f.add_subplot(111)
    csf = cluster_sims.flatten()
    csf -= max(csf)
    csf *= -1
    h = histogram(log10(1+csf), bins = 100)

  cluster(cluster_training, ss.scoreatpercentile(cluster_training,.2) )
  fopen = open(cfg.dataPath('bdtnp/clustering/nuclei/idxs'))
  lines = fopen.readlines()
  c = [int(l.strip()) for l in lines]
  c_training_exemplars = set(c)
  exemplar_inds = [cluster_inds[i] for i in c_training_exemplars]
  #I am being a bit lazy with subscripting here because I just assume
  #that the similarity is symmetric... I suppose I could let it be 
  #asymmetric if I liked

  exemplars = nuclei[exemplar_inds,:]
  all_sims = similarity(nuclei,  exemplars,
                        transform = t, 
                        transform_exemplars = True,
                        method = sim)
  assignments = np.argmax(all_sims,1)

  ne = len(c_training_exemplars)
  colors = array(mycolors.getct(len(c)))
  colors = array(colors)

  if do_show:
    for tp in range(shape(mrnas.values()[0]['vals'])[1])[-1:]:
      try: f.clear()
      except Exception, e: print 'Weird 3d plotting error. Alas'
      nuclei =array([ mrnas.values()[idx]['vals'][:,tp]
                      for idx in xinds]).T
      all_sims = similarity(nuclei,  exemplars,
                            transform = t, transform_exemplars = True,
                            method = sim)
      assignments = np.argmax(all_sims,1)

      ax = f.add_subplot(111)
      #colors = [colors[i] for i in c]
      xs = misc['x']['vals'][::step,0]
      ys = misc['y']['vals'][::step,0]
      zs = misc['z']['vals'][::step,0]
      ax.scatter(xs, zs,s= 50, color =colors[assignments[::step]])
      #ax.set_title('''Virtual embryo cell (n={2}) clusters 
#from similarities derived from {0} genes. 
#Clusters derived at T = {1}, shown at T = {3}.'''\
#                     .format(nx,timepoint, len(xs),tp))
      f.savefig(cfg.dataPath('figs/bdtnp/cluster_movie{0:02d}.tiff'.format(tp)), format = 'tiff')
Example #6
def cluster_exprs(all_members, ct_data,
                  do_plot = False,
                  cluster_type = '4d',
                  cluster_id = 4):
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)

  c = all_members[cluster_id]
  c_unq = set(list(c))

  tissues = dict([('t_{0}'.format(i) , dict(cts = ct_data[equal(c,elt)]))
                  for i, elt in enumerate(c_unq)])
  nt = 6
  counts = array([[sum(equal(v['cts'][:,1],t))
                   for t in range(nt) ] 
                  for v in tissues.values() ])

  if do_plot:
    f = plt.figure(1)
    ax1 = f.add_subplot('121')
    ax2 = f.add_subplot('122')
    seismic.seismic(counts , ax = ax1,stacked = True,colors = mycolors.getct(len(counts)))
    #seismic.seismic(np.sort(counts,0) , ax = ax2,stacked = False,colors = mycolors.getct(len(counts)))
  all_exprs = {}
  for t, v in tissues.iteritems():
    ct_all = v['cts']
    for time in set([c[1] for c in ct_all]):
      ct = [ct for ct in ct_all if ct[1] == time]

      exprs =dict( [(k,elt['vals'][zip(*ct)]) for k, elt in mrnas.iteritems()])
      ys = misc['y']['vals'][zip(*ct)] #zip(*sim_xy)]
      zs = misc['z']['vals'][zip(*ct)] #zip(*sim_xy)]
      xs = misc['x']['vals'][zip(*ct)] #zip(*sim_xy)]

      f = plt.figure(1)
      ax1 = f.add_subplot('121', title = 'X-Z axis view for tissue {0}'.\
      ax2 = f.add_subplot('122',title = 'Y-Z axis view for tissue {0}'.\
      ax1.scatter(xs, zs)
      ax2.scatter(ys, zs)
      v['exprs'] = exprs
  exprs_out = dict([( k, [ mean(sub[k]) for sub in all_exprs[k].values() ]) 
                    for k in all_exprs.keys() ])

  raise Exception()
Example #7
def c2( launcher = None, ncluster =2000, host = 'tin', 
        reset = 0, step = 10, exemp_time = 'all',
        doplot = False ,**kwargs):
  mrnas = nio.getBDTNP()
  misc = nio.getBDTNP(misc = True)
  vals = array([v['vals'] for v in mrnas.values()])
  gvars = var(vals, 1)
  gminvars = np.min(gvars,1)
  gmedvars = median(gvars,1)

  min20 = argsort(gminvars)[::-1][:20]
  med20 = argsort(gmedvars)[::-1][:20]

  int20 = set(min20).intersection(set(med20))
  xgenes = array(list(int20))

  cell_data = vals[xgenes].transpose(1,2,0)
  scd = shape(cell_data)
  #times = reshape(zeros(shape(cell_data[0:2]))[:,:,newaxis , arange(shape(cell_data[1]))
  #                    , (prod(shape(cell_data)[0:2])))
  xycoords = (arange(scd[0])[:,newaxis,newaxis]*[1,0] +\
  cell_data = reshape(cell_data, (prod(shape(cell_data)[0:2]), shape(cell_data)[2] ))
  xy_data = reshape(xycoords, (prod(scd[0:2]),2 ))
  if exemp_time == 'all':
    inds = arange(len(cell_data))
    inds = arange(len(cell_data))[nonzero(equal(xy_data[:,1],exemp_time))[0]]
  rand_thousand = inds[0:ncluster]
  sim_data = cell_data[rand_thousand]
  sim_xy = xy_data[rand_thousand]
  t = [ mean(sim_data, 0), std(sim_data,0)]
  t[1][equal(t[1],0)] = 0
  metric = 'neg_dist'
  sims = similarity(sim_data, transform = t, method = metric)

  name = 'll_{0}_{1}_{2}'.format(metric,ncluster,exemp_time)
  def setLauncher(**kwargs):
    sims= kwargs.get('sims')
    metric = kwargs.get('metric')
    name = kwargs.get('name')
    d_in = []
    percs = logspace(.1,1.5,8)
    for p in percs:
      d_in.append(dict(similarities = sims,
                       self_similarity = ss.scoreatpercentile(sims, p),
                       metric = metric

    launcher = bcl.launcher(d_in, host = host, name = name)
    return launcher  
  if launcher == None:
    output = mem.getOrSet(setLauncher,
                          **mem.rc(dict(sims = sims, metric = metric,
                                        name = name,
                                        hardcopy = True,
                                        reset = reset,
                                        hard_reset = False,)))  
    return output

  def setC2(launcher = launcher, **kwargs):
    if launcher == None:
      raise Exception()
      output = launcher.output()
    return output
    #It appears that the bsub process failed for the first output.
    #No big deal. Debug later.
  output = mem.getOrSet(setC2,
                        **mem.rc(dict(harcopy = True,
                                      launcher = launcher,
                                      reset = reset,
                                      on_fail = 'compute',
                                      hard_reset = False,
                                      name =  'c2'+ name )))
  all_inds = array([  squeeze(o['inds']) for o in output[:] ])

  xs = misc['x']['vals'][zip(*xy_data)] #zip(*sim_xy)]
  ys = misc['y']['vals'][zip(*xy_data)] #zip(*sim_xy)]
  zs = misc['z']['vals'][zip(*xy_data)] #zip(*sim_xy)]
  colors =array( mycolors.getct(shape(all_inds)[1]) )
  f = plt.figure(0)
  all_tps = range(scd[1])
  nc = len(all_inds)
  nt = len(all_tps)

  all_members = []
  for i, inds in enumerate(all_inds):
    #compute similarity matrices 1000 at a time:
    exemplars = sim_data[list(set(list(inds)))]
    sim = similarity(cell_data, 
                   transform = t,
                   method = metric)
    closest = argmax(sim, 1)
    if doplot:
      for j, tp in enumerate(all_tps):
        ax = f.add_axes( [float(j)/nt,float(i) /nc,1./nt, 1. /nc] )
        i_sub = nonzero(equal(xy_data[:,1], j) * greater(ys,0))[0]
        cs = colors[closest[i_sub]]
        x = xs[i_sub]
        z = zs[i_sub]
        plt.scatter(x[::step],z[::step], 40,alpha = .75, c = cs[::step], edgecolor = 'none')
  ct_data = xy_data
  return all_members, ct_data