Example #1
0
def get_tf_ss(cluster = 0, trgnames = None, basic = False):
    if basic:

        trgs, tfs = nu.parse_net()
    #grab a list of the tfs regulating 10 random trgs
        tfl = []
        for k in trgs.keys()[0:50]:
            item = trgs[k]
            tfs = item['tfs']
            for t in tfs:
                if not t in tfl:
                    tfl.append(t)
    else:
        min_regs = min_tf_perc
        nat = greater(nu.net_affinity(),0)
        tgkeys = nu.net_trg_keyidxs()
        tg_sub = nat[[tgkeys[k] for k in trgnames],:]
        mem_means = mean(array(tg_sub,float),0)
        tfkeys = nu.net_tf_keyidxs()
        tfl = []
        tfhash = ['']* len(tfkeys.keys())
        for k,v in tfkeys.items(): tfhash[v] = k
        for n in nonzero(greater(mem_means,min_regs))[0]:
            tfl.append(tfhash[n])
        
    return tfl
Example #2
0
File: s3.py Project: bh0085/compbio
def all_siblings(name = default_name, reset = 0):
    
    donp = True
    hardcopy = True
    if not reset:
        out, sxs = nw.rn2(default_name, 
                          np = donp, hardcopy = hardcopy)
    if reset or not sxs:
        
        trgs, tfs = nu.parse_net(reset=mod(reset,2))
        na = nu.net_affinity(reset = mod(reset,2))
    

    
        na_thr = greater(na, min_thr)
        nrms =  sqrt(sum(power(na_thr,2),1))[:,newaxis]
        nrms[equal(nrms,0)] = 1
        nnn = array(na_thr,float)/nrms
        gg = dot(nnn,nnn.T)

        sibs = array(greater(gg, sib_thr), bool)
    
        nw.wn2(default_name, sibs,
               np = donp,
               hardcopy = hardcopy)
        out = sibs
    return out
Example #3
0
File: s3.py Project: bh0085/compbio
def coreg_keys(t0 = None, do_plot = False):
    trgs, tfs = nu.parse_net()
    btgs, btfs = trg_markov_blanket(t0)
    if do_plot:
        show_m(btgs,btfs,t0)
    
    min_wt = .3
    tgs_thr =nonzero( greater(btgs.values(),min_wt) )[0]
    keys_thr =[btgs.keys()[i] for i in tgs_thr]
    keys_thr.remove(t0)
    
    na = nu.net_affinity()
    ktf = nu.net_tf_keyidxs()
    ktg = nu.net_trg_keyidxs()

    shared = []
    threshold_sims = True
    for k in keys_thr:
    #    bg,bf = trg_markov_blanket(k, do_tgs = False)#
        row1 = na[ktg[k]] 
        row0 = na[ktg[t0]]
        if threshold_sims:
            row0=array(greater(row0,min_wt),float)
            row1=array(greater(row1,min_wt),float)
        for r in [row0,row1]: 
            l = sqrt(sum(power(r,2)))
            if l != 0: r /= l
        shared.append(sum(row0*row1))

    shared = array(shared)
    min_sharing = .4
    coreg_keys = [ keys_thr[i] for i in nonzero(greater(shared,min_sharing))[0]]
    
    if do_plot:
        plot_shared(shared)
Example #4
0
def view_in():
    na = nu.net_affinity()
    f = plt.figure(0)
    f.clear()
    ax = f.add_subplot(111)
    in_degree = sum(na, 0)
    srt = argsort(in_degree)

    sm.seismic([in_degree[srt]], ax=ax)

    myplots.maketitle(ax, 'In degree, sorted')
Example #5
0
def draw_hclustered(clustered):
    
    c = clustered['HCluster']
    clusters = c.cut(0)
    f = plt.figure(0)
    f.clear()
    ax = f.add_axes([0,0,1,1],aspect = 'auto')
    ax.set_aspect('auto')
    c0 = c.cut(0)
    n = max(array(c0)) +1
    nlvl = 100
    h = zeros((nlvl,n))

    cuts = linspace(0,.9,nlvl)
    appearances = zeros(n)-1
    hinds = zeros((nlvl,n),int)
    for i in range(nlvl):
        cut = cuts[i]
        clusters = c.cut(cut)
        for j in range(n):
            cval = clusters[j]
            if appearances[cval] == -1: appearances[cval] = j
            h[i,j] = appearances[cval]
        hinds[i,:] = argsort(h[i,:])
        h[i,:]=h[i,hinds[i,:]]

    for i in range(shape(h)[0]):
        h[i,:] /= max(h[i,:])

    ax.imshow(mycolors.imgcolor(h, BW = True), aspect = 'auto', interpolation = 'nearest')
    saff,ks = nu.net_square_affinity()
    raff,kt,ktf = nu.net_affinity()
    
    tfidxs = []
    for tf in ktf.keys():
        tfidxs.append(ks.index(tf))
    tfidxs= array(tfidxs)

    is_clustered = tfidxs[nonzero(less(tfidxs,n))[0]]
    ntf = len(is_clustered)
    tf_alpha = zeros((nlvl,n))
    for i in range(ntf):
        tf_alpha +=equal(hinds,is_clustered[i])
        
    tf_rgba = mycolors.imgcolor(tf_alpha,alpha = True,color = [1,0,0])
    
    ax.imshow(tf_rgba, aspect = 'auto', interpolation = 'nearest')
Example #6
0
def net_cluster_genes_by_tf(k= k, reset = 0):
    hardcopy = True
    try:
        if reset: raise Exception('compute')
        out,sxs =  nw.rn2(name, hardcopy = hardcopy)
        if not sxs: raise Exception()

    except Exception as e:
        if e.args[0] != 'compute': raise Exception()
        nw.claim_reset()    
        gg = nu.net_affinity(reset = mod(reset,2))
        kmeans = mlpy.Kmeans(k)
        clustered = kmeans.compute(gg[0:maxgenes,:])
        means = kmeans.means
        out = (clustered,means)
        nw.wn2(name, (clustered, means) ,hardcopy = hardcopy) 
    return out
Example #7
0
def net_svd(name=nu.default_name, reset=0):
    hardcopy = True
    try:
        if reset:
            raise Exception("compute")
        return nw.readnet(name=nu.default_name, hardcopy=hardcopy)
    except Exception as e:
        if e.args[0] != "compute":
            raise Exception()
        nw.claim_reset()
        a = nu.net_affinity(reset=mod(reset, 2))[0]
        N = a[0]
        U, S, Vh = lin.svd(N)
        V = Vh.T
        U = U[:, len(S)]
        dosave = (a[1], (U, S, V))
        nw.writenet(name, dosave, hardcopy=hardcopy)
        return dosave
Example #8
0
def cluster( columns = False):

    sqa = nu.net_affinity()[0]
    #tfidxs = nonzero(np.max(sqa ,0))[0]
    #raise Exception()
    
    if columns:
        arr = array(sqa.T)
    else:
        arr = array(sqa)
        
    subarr = arr[:,:]
    
    k = 20
    
    kmeans = mlpy.Kmeans(k)
    clustered = kmeans.compute(subarr)
    means = kmeans.means
    import compbio.learning.clusterview as cl
    cl.specview(means,clustered)
Example #9
0
def viewmany(all_means, all_clusters, fig = 12):
    n = len(all_means)
    f = plt.figure(fig)
    f.clear()
    print '''Running viewmany.py

For now, viewmany assumes that k is equal across clustering instances
this is not really important but has to do with how TF projections are
stored.
'''
    #1 k.
    k = len(all_means[0])

    ax1 = f.add_axes([.05,.05,.95,.4])
    ax2 = f.add_axes([.05,.55,.95,.4])
    ct0 = mycolors.getct(n)

    sqa = nu.net_square_affinity()[0]
    aff = nu.net_affinity()[0]

    #tf_sqidxs should have length = ntf
    #with each element giving the coordinate of the
    #i'th tf in sqa space.


    sqidxs = nu.net_sq_keyidxs()
    n_tfidxs = nu.net_tf_keyidxs() 
    trgs,tfs = nu.parse_net()
    tf_sqidxs = [sqidxs[key] for key in tfs.keys()]
    tfidxs = n_tfidxs.values()
    ntf = len(tfidxs)

    tfweights = zeros(ntf,int)
    #find tfs of general interest, choosing at most ten for each clustering
    ntf_each = 20
    
    print '''...Computing representative TFs for each clustering.

In the current formulation, we project each mean on to associated tf
and then normalize each projection so that each mean has equal weight
in TF selection.

Not that we have handled the case where we have clusted in TF space
explicitly (e.g, dim = 541) and where we are in gene space explicitly,
(e.g., dim = 8321, GG matrix or svdU). svdV is emphatically not handled.
Neither would svdU of TF-TF which is actually the the exact same thing.'''
    

    
    TFprojs= zeros((n,k,ntf))
    for i in range(n):
        m = all_means[i]
        dim = shape(m)[1]
        #we are now going to project clusters on to the tfs
        #in this form, we only need rows corresponding to tfs.

        if dim> 500:
            #If dim = 541, we just read off the most important tfs
            this_tf_sum = np.abs(m[:,tfidxs])
            TFprojs[i,:,:] = this_tf_sum
            #normalize clusters
            this_tf_sum = this_tf_sum / np.sum(this_tf_sum,1)[:,newaxis]
            this_tf_sum = np.sum(this_tf_sum,0)
    
        #Now, since we are at the moment only working with GG
        #and SVD_U, we are in gene space and can undo the mapping
        #with sqaT
        elif dim > 8000:
            #remember, ROWS of the matrix correspond to the
            #target space.
            a = sqa.T[tf_sqidxs,:]            
            this_tf_sum = np.abs(np.sum(a[newaxis,:,:]*m[:,newaxis,:],2))
            TFprojs[i,:,:] = this_tf_sum
            #normalize so that each mean has the same weight
            this_tf_sum = this_tf_sum / np.sum(this_tf_sum,1)[:,newaxis]
            #sum over cluster means to find the most important tfs
            this_tf_sum = np.sum(this_tf_sum,0)
            
    

        best = argsort(this_tf_sum)[::-1]
        tfweights[best[0:ntf_each]]=1
    print '''Finished computing representative TFs
'''

    tfs_of_interest = nonzero(tfweights)[0]
    ntf = len(tfs_of_interest)
    avg_unshared = float(ntf)/(n * ntf_each)
    avg_shared = 1. - float(ntf)/(n * ntf_each)
    print '''Allowing for each cluster to choose '+str(ntf_each) + 'tfs,
we got ''' + str(ntf) + ''' tfs of interest.
or a mean sharing ratio of ''' + str(round(avg_shared,3))+ '''.'''

    #get a color table for clusters.
    ct = mycolors.getct(n)

    for i in range(n):
        #p stands for 'point' as in datapoint.
        #data points are labeled with clusters.

        xax = linspace(0,1,ntf)

        ax1.plot(xax,np.sum(TFprojs[i,:,tfs_of_interest],1)/np.max(TFprojs[i,:,tfs_of_interest],1),color = ct[i])

    return TFprojs