def all_corr(vec): ln=len(vec[0]) print ln, ln print len(vec), len(vec[0]) vec=np.array(vec) norm=[] for ii in xrange(ln): norm.append(ss.zscore(vec[:,ii])) norm=analyse.data2arr(norm) print 'spearman ...' #cor_mat=ss.spearmanr(norm)[0] cor_mat=[] for ii in range(len(vec)): cor_mat.append([]) for jj in xrange(len(vec)): tmp=ss.pearsonr(vec[ii],vec[jj]) cor_mat[-1].append(tmp[0]) #print len(vec[:,ii]) cor_mat=np.array(cor_mat) #### dist=1-cor_mat distance=[] for i in xrange(len(dist)): distance=distance+list(dist[i][i+1:]) print len(distance) distance=np.array(distance) return norm.T,cor_mat,distance
def CPG_RPKM(RPKM, CPG, lim): CGI=[] cpg=read.read_dat(CPG,'\t') for i in cpg: CGI.append(i[0]) genes=RPKM[0][1:] libs=[] mat=[] for i in RPKM[1:]: mat.append(i[1:]) libs.append(i[0]) mat=analyse.data2arr(mat) genes=np.array(genes) allave=[] cgiave=[] print len(mat) for i in range(len(mat)): gntmp=genes[mat[i,:]>lim] tmp=mat[i,:][mat[i,:]>lim] allave.append(np.mean(tmp)) cgirpkm=[] for j in xrange(len(gntmp)): if gntmp[j] in CGI: cgirpkm.append(tmp[j]) cgiave.append(np.mean(cgirpkm)) print len(gntmp), len(cgirpkm) allave=np.array(allave) cgiave=np.array(cgiave) return allave,cgiave, libs
def CPG_RPKM(RPKM, CPG, lim): CGI = [] cpg = read.read_dat(CPG, '\t') for i in cpg: CGI.append(i[0]) genes = RPKM[0][1:] libs = [] mat = [] for i in RPKM[1:]: mat.append(i[1:]) libs.append(i[0]) mat = analyse.data2arr(mat) genes = np.array(genes) allave = [] cgiave = [] print len(mat) for i in range(len(mat)): gntmp = genes[mat[i, :] > lim] tmp = mat[i, :][mat[i, :] > lim] allave.append(np.mean(tmp)) cgirpkm = [] for j in xrange(len(gntmp)): if gntmp[j] in CGI: cgirpkm.append(tmp[j]) cgiave.append(np.mean(cgirpkm)) print len(gntmp), len(cgirpkm) allave = np.array(allave) cgiave = np.array(cgiave) return allave, cgiave, libs
def all_corr(vec): ln = len(vec[0]) print ln, ln print len(vec), len(vec[0]) vec = np.array(vec) norm = [] for ii in xrange(ln): norm.append(ss.zscore(vec[:, ii])) norm = analyse.data2arr(norm) print 'spearman ...' #cor_mat=ss.spearmanr(norm)[0] cor_mat = [] for ii in range(len(vec)): cor_mat.append([]) for jj in xrange(len(vec)): tmp = ss.pearsonr(vec[ii], vec[jj]) cor_mat[-1].append(tmp[0]) #print len(vec[:,ii]) cor_mat = np.array(cor_mat) #### dist = 1 - cor_mat distance = [] for i in xrange(len(dist)): distance = distance + list(dist[i][i + 1:]) print len(distance) distance = np.array(distance) return norm.T, cor_mat, distance
def data2matrix(T,NA): x=T[0][1:] y=[] mat=[] for i in xrange(len(T)): for j in xrange(len(T[0])): if T[i][j]=='NA' or T[i][j]=='N/A': T[i][j]=NA for i in T[1:]: y.append(i[0]) mat.append(map(float,i[1:])) #print mat[0] mat=analyse.data2arr(mat) return x,y,mat