def run_snmf(V): """ Run sparse nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ # SNMF/R rank = 10 snmf = nimfa.Snmf(V, seed="random_c", rank=rank, max_iter=12, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) fit = snmf() print_info(fit) # SNMF/L snmf = nimfa.Snmf(V, seed="random_vcol", rank=rank, max_iter=12, version='l', eta=1., beta=1e-4, i_conv=10, w_min_change=0) fit = snmf() print_info(fit)
def one_run(iterador, df, rank,type_nmf): V=np.array(df) V=np.transpose(V) D={} for i in iterador: d={} if(type_nmf=="nmf"): nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=1000000, update='divergence', objective='conn', conn_change=40) if(type_nmf=="snmf"): nmf = nimfa.Snmf(V, rank=rank, seed="random_vcol", max_iter=1000000, conn_change=40, version = 'l') if(type_nmf=="nsnmf"): nmf = nimfa.Nsnmf(V, rank=rank, seed="random_vcol", max_iter=1000000, objective='conn', conn_change=40) fit = nmf() S=fit.summary() SS={} SS['connectivity']=S['connectivity'] SS['euclidean']=S['euclidean'] SS['evar']=S['evar'] SS['kl']=S['kl'] SS['rss']=S['rss'] SS['sparseness']=S['sparseness'] d['summary']=SS d['n_iter']=fit.n_iter d['distance']=fit.distance H=pd.DataFrame(fit.basis()) E=extract_norm(H) d['basis']=E['P'] d['coef']=pd.DataFrame(E['R']*fit.coef()) D[i]=d return D
def factorize(data): """ Perform factorization on S. cerevisiae FunCat annotated sequence data set (D1 FC seq). Return factorized data, this is matrix factors as result of factorization (basis and mixture matrix). :param data: Transformed data set containing attributes' values, class information and possibly additional meta information. :type data: `tuple` """ V = data['attr'] snmf = nimfa.Snmf(V, seed="random_vcol", rank=40, max_iter=5, version="l", eta=1., beta=1e-4, i_conv=10, w_min_change=0) print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank)) fit = snmf() sparse_w, sparse_h = fit.fit.sparseness() print("""Stats: - iterations: %d - KL Divergence: %5.3f - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'), sparse_w, sparse_h)) data['W'] = fit.basis() data['H'] = fit.coef() return data
def param_sweep_rss(V, k_range, beta_range): k_length = k_range.size beta_length = beta_range.size nEdges = V.shape[0] #num rows nBlocks = V.shape[1] #num cols parameter_space = np.zeros((k_length, beta_length)) eta = np.max(V)**2 for ii in np.arange(k_length): for jj in np.arange(beta_length): print(jj) k = k_range[ii] beta = beta_range[jj] #fctr = nimfa.mf(V, seed = 'nndsvd', rank=k, \ # method='snmf', max_iter=30, initialize_only=True, \ # version='r', eta = eta, beta = beta, i_conv = 10, w_min_change = 0) #fctr_res = nimfa.mf_run(fctr) snmf = nimfa.Snmf(c, seed="nndsvd", rank=k, max_iter=30, version='r', eta=eta, beta=beta, i_conv=10, w_min_change=0) fctr_res = snmf() parameter_space[ii, jj] = fctr_res.fit.rss() print(ii) return {'parameter_space': parameter_space}
def factorize(V): """ Perform SNMF/R factorization on the sparse MovieLens data matrix. Return basis and mixture matrices of the fitted factorization model. :param V: The MovieLens data matrix. :type V: `numpy.matrix` """ snmf = nimfa.Snmf(V, seed="random_vcol", rank=30, max_iter=30, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank)) fit = snmf() sparse_w, sparse_h = fit.fit.sparseness() print( """Stats: - iterations: %d - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w, sparse_h)) return fit.basis(), fit.coef()
def getWH(self, x_input, rank=10): snmf = nimfa.Snmf(x_input, seed="random_c", rank=rank, max_iter=12, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) snmf_fit = snmf() W = snmf.basis() H = snmf.coef() return W, H
def cons_sig(df): kk = 1 np.random.seed(0) w_r = np.random.random((df.shape[0], kk)) h_r = np.random.random((kk,df.shape[1])) betaa = 0.0 snmf0 = nimfa.Snmf(np.matrix(df),rank=kk, beta = betaa ,max_iter=1000, W = w_r, H = h_r,version='r', min_residuals = 0.0000001) snmf0_fit = snmf0() W00 = snmf0_fit.fit.W H0 = snmf0_fit.fit.H return W00
def separate_stains_xu_snmf(im_sda, w_init=None, beta=0.2): """Compute the stain matrix for color deconvolution with SNMF. ... (sparse non-negative matrix factorization). Parameters ---------- im_sda : array_like Image (MxNx3) or matrix (3xN) in SDA space for which to compute the stain matrix. w_init : array_like, default is None Initial value for the stain matrix. if not provided, default initialization is used. beta : float Regularization factor for the sparsity of the deconvolved pixels Returns ------- w : array_like A 3x3 matrix of stain column vectors Note ---- All input pixels are used in the factorization. See Also -------- histomicstk.preprocessing.color_deconvolution.color_deconvolution histomicstk.preprocessing.color_deconvolution.separate_stains_macenko_pca References ---------- .. [#] Van Eycke, Y. R., Allard, J., Salmon, I., Debeir, O., & Decaestecker, C. (2017). Image processing in digital pathology: an opportunity to solve inter-batch variability of immunohistochemical staining. Scientific Reports, 7. .. [#] Xu, J., Xiang, L., Wang, G., Ganesan, S., Feldman, M., Shih, N. N., ... & Madabhushi, A. (2015). Sparse Non-negative Matrix Factorization (SNMF) based color unmixing for breast histopathological image analysis. Computerized Medical Imaging and Graphics, 46, 20-29. """ # Image matrix m = utils.convert_image_to_matrix(im_sda) m = utils.exclude_nonfinite(m) factorization = \ nimfa.Snmf(m, rank=m.shape[0] if w_init is None else w_init.shape[1], W=w_init, H=None if w_init is None else np_linalg.pinv(w_init).dot(m), beta=beta) factorization.factorize() return htk_linalg.normalize(numpy.array(factorization.W))
def SNMF(X, k=20): snmf = nimfa.Snmf(X0, rank=k, max_iter=10) snmf_fit = snmf() # W=bmf.W # H=bmf.H # print(W.shape()) # print(H.shape()) # break target = snmf_fit.fitted() return target
def run_nmf(nSubj, nNodes, k, beta, input_filename, output_filename): triuIdx = np.triu_indices(nNodes, k=1) filename = input_filename save_file = output_filename print(filename) h5f = h5py.File(filename, 'r') c = h5f['config_matrix'] print(c.shape) eta = np.max(c)**2 # fctr = nimfa.mf(c, seed = 'nndsvd', rank = k, \ # method = 'snmf', max_iter = 30, initialize_only = True, \ # version = 'r', eta = eta, beta = beta, i_conv = 10, w_min_change = 0) # fctr_res = nimfa.mf_run(fctr) snmf = nimfa.Snmf(c, seed="nndsvd", rank=k, max_iter=30, version='r', eta=eta, \ beta=beta, i_conv=10, w_min_change=0) fctr_res = snmf() #output matrices basis = np.array(fctr_res.basis()) expr = np.array(fctr_res.coef()).T #reshape subnetworks coactMatr = np.zeros((k, nNodes, nNodes)) for c in np.arange(k): basisNet = np.zeros((nNodes, nNodes)) basisNet[triuIdx[0], triuIdx[1]] = basis[:, c] basisNet += basisNet.T coactMatr[c, ...] = basisNet[...] #save output f = h5py.File(save_file, 'w') f.create_dataset('subnetworks', data=coactMatr) f.create_dataset('timeseries', data=expr) h5f.close() f.close()
def factorize(V, rank_, algorithm='snmf'): if algorithm == 'snmf': snmf = nimfa.Snmf(V, seed="random_vcol", rank=rank_, max_iter=30, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) print("Algorithm: %s\nInitialization: %s\nRank: %d" % (snmf, snmf.seed, snmf.rank)) fit = snmf() sparse_w, sparse_h = fit.fit.sparseness() print( """Stats:- iterations: %d - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w, sparse_h)) return fit.basis(), fit.coef() return 1, 1
def run(self, output_file): print "Running non-negative MF....", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) if self.method == 'nmf': modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "lfnmf": modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "nsnmf": modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "pmf": modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "psmf": modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "snmf": modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "sepnmf": modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter) else: print "No model is being recognized, stopped." sys.exit(1) model = modelnmf() self.result = np.array(model.fitted()) print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime()) with open(output_file, "r+") as file: query = file.readlines() file.seek(0) file.truncate() for line in query: list = line.split() newline = "%s %s %f\n" % ( list[0], list[1], self.result[int(list[0])][int(list[1])] ) file.write(newline)
def predict(self, X): """ :param X: with shape (n_pixel, n_band) :return: """ # # Note that X has to reshape to (n_fea., n_sample) # XX = X.transpose() # (n_band, n_pixel) # snmf = nimfa.Snmf(X, seed="random_c", rank=self.n_band) # remain para. default snmf = nimfa.Snmf(X, rank=self.n_band, max_iter=20, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) snmf_fit = snmf() W = snmf.basis() # shape: n_band * k H = snmf.coef() # shape: k * n_pixel # get clustering res. H = np.asarray(H) indx_sort = np.argsort(H, axis=0) # ascend order cluster_res = indx_sort[-1].reshape(-1) # select band selected_band = [] for c in np.unique(cluster_res): idx = np.nonzero(cluster_res == c) center = np.mean(X[:, idx[0]], axis=1).reshape((-1, 1)) distance = np.linalg.norm(X[:, idx[0]] - center, axis=0) band_ = X[:, idx[0]][:, distance.argmin()] selected_band.append(band_) while selected_band.__len__() < self.n_band: selected_band.append(np.zeros(X.shape[0])) bands = np.asarray(selected_band).transpose() return bands
args = argument_parser.parse_args() node_feature = args.node_feature out_prefix = args.output_prefix out_dir = args.output_dir refex_features = np.loadtxt(node_feature, delimiter=',') actual_fx_matrix = refex_features[:, 1:] n, f = actual_fx_matrix.shape print 'Number of Features: ', f print 'Number of Nodes: ', n sparsity_threshold = 2.0 for i in xrange(1, 6): for rank in xrange(20, 29 + 1): snmf = nimfa.Snmf(actual_fx_matrix, seed="random_vcol", version='r', rank=rank, beta=2.0) snmf_fit = snmf() G = np.asarray(snmf_fit.basis()) F = np.asarray(snmf_fit.coef()) w_out = '%s-%s-%s-nodeRoles.txt' % (rank, i, out_prefix) h_out = '%s-%s-%s-roleFeatures.txt' % (rank, i, out_prefix) np.savetxt(out_dir + w_out, X=G) np.savetxt(out_dir + h_out, X=F)
return Y, X V = np.random.rand(1000, 40) start = current_milli_time() w, h = sparse_right_nmf(V, rank=15, max_iters=10, beta=2.0) end = current_milli_time() # w, h =nmf(V, k=4, max_iters=30) a = np.abs(V - np.dot(w, h)) c = LA.norm(a, 'fro') print c, (end - start) start = current_milli_time() snmf = nimfa.Snmf(V, seed="random_vcol", version='r', rank=15, beta=2.0, max_iter=10) snmf_fit = snmf() G = np.asarray(snmf_fit.basis()) F = np.asarray(snmf_fit.coef()) end = current_milli_time() a = np.abs(V - np.dot(G, F)) c = LA.norm(a, 'fro') print c, (end - start)
def NMFAnalysis(filename,Rank,turn=0,strategy="conservative"): X=[] header=[] head=0 exportnam=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_versionr.txt'#+str(Rank)+'.txt' export_res=export.ExportFile(exportnam) exportnam_bin=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_binary.txt'#+str(Rank)+'.txt' export_res1=export.ExportFile(exportnam_bin) exportnam_bint=export.findParentDir(filename)+'/NMF/round'+str(turn)+'NMFsnmf_binary_t_.txt'#+str(Rank)+'.txt' export_res5=export.ExportFile(exportnam_bint) exportnam2=export.findParentDir(filename)+'/SubtypeAnalyses/round'+str(turn)+'Metadata.txt'#+str(Rank)+'.txt' export_res2=export.ExportFile(exportnam2) exportnam3=export.findParentDir(filename)+'/SubtypeAnalyses/round'+str(turn)+'Annotation.txt'#+str(Rank)+'.txt' export_res3=export.ExportFile(exportnam3) if 'Clustering' in filename: count=1 start=2 else: count=0 start=1 print filename for line in open(filename,'rU').xreadlines(): line=line.rstrip('\r\n') q= string.split(line,'\t') if head >count: val=[] val2=[] me=0.0 for i in range(start,len(q)): try: val2.append(float(q[i])) except Exception: continue me=np.median(val2) for i in range(start,len(q)): try: val.append(float(q[i])) except Exception: val.append(float(me)) X.append(val) else: export_res1.write(line) export_res.write(line) export_res1.write("\n") export_res.write("\n") header=q head+=1 continue group=defaultdict(list) sh=[] X=np.array(X) mat=[] mat=zip(*X) mat=np.array(mat) nmf = nimfa.Snmf(mat,seed="nndsvd", rank=int(Rank), max_iter=20,n_run=10,track_factor=True) nmf_fit = nmf() W = nmf_fit.basis() W=np.array(W) H=nmf_fit.coef() H=np.array(H) sh=W.shape export_res3.write("uid\tUID\tUID\n") if int(Rank)==2: par=1 else: par=2 W=zip(*W) W=np.array(W) sh=W.shape Z=[] for i in range(sh[0]): new_val=[] val=W[i,:] num=sum(i > 0.10 for i in val) if num >40 or num <3: compstd=True else: compstd=False me=np.mean(val) st=np.std(val) #print 'V'+str(i) export_res.write('V'+str(i)) export_res1.write('V'+str(i)) for j in range(sh[1]): if compstd: if float(W[i][j])>=float(me+(par*st)): export_res1.write("\t"+str(1)) new_val.append(1) else: export_res1.write("\t"+str(0)) new_val.append(0) else: if float(W[i][j])>0.1: export_res1.write("\t"+str(1)) new_val.append(1) else: export_res1.write("\t"+str(0)) new_val.append(0) export_res.write("\t"+str(W[i][j])) Z.append(new_val) export_res.write("\n") export_res1.write("\n") Z=np.array(Z) sh=Z.shape Z_new=[] val1=[] Z1=[] dellst=[] export_res2.write("uid") export_res5.write("uid") for i in range(sh[0]): indices=[] val1=Z[i,:] sum1=sum(val1) flag=False indices=[index for index, value in enumerate(val1) if value == 1] for j in range(sh[0]): val2=[] if i!=j: val2=Z[j,:] sum2=sum([val2[x] for x in indices]) summ2=sum(val2) try: if float(sum2)/float(sum1)>0.5: if summ2>sum1: flag=True #print str(i) except Exception: continue if flag==False: Z1.append(val1) export_res2.write("\t"+'V'+str(i)) export_res5.write("\t"+'V'+str(i)) export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n") export_res2.write("\n") export_res5.write("\n") Z1=np.array(Z1) Z=Z1 Z=zip(*Z) Z=np.array(Z) sh=Z.shape print "stringency = ",[strategy] for i in range(sh[0]): val1=Z[i,:] #print sum(val1) #if sum(val)>2: if sum(val1)>2: val=[0 if x==1 else x for x in val1] else: val=val1 me=np.mean(val) st=np.std(val) export_res2.write(header[i+1]) export_res5.write(header[i+1]) for j in range(sh[1]): if strategy=="conservative": #print header[i+1] export_res2.write("\t"+str(val1[j])) export_res5.write("\t"+str(val1[j])) else: #print header[i+1] export_res2.write("\t"+str(val[j])) export_res5.write("\t"+str(val[j])) export_res2.write("\n") export_res5.write("\n") Z_new.append(val) Z_new=zip(*Z_new) Z_new=np.array(Z_new) sh=Z_new.shape export_res5.close() Orderedheatmap.Classify(exportnam_bint) return exportnam,exportnam_bin,exportnam2,exportnam3
NMF_power.fit(spec_matrix) NM_power_weights = NMF_power.components_ NM_power_basis = NMF_power.transform(spec_matrix) NM_weights_desc = pd.DataFrame(NM_power_weights.transpose()).describe() print NM_weights_desc print datetime.now( ) - startTime # prints execution time of the cell: 4mins for 1 hour spec #%% TRY nimfa methods.factorization.snmf #### sparse nonnegative matrix factorisation snmf = nimfa.Snmf(spec_matrix, seed="random_vcol", rank=40, max_iter=20, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) snmf_fit = snmf() #%% Looks better - sparse at least SNMF_basis = snmf_fit.basis() SNMF_weights = snmf_fit.coef() SNMF_weights_desc = pd.DataFrame(SNMF_weights.transpose()).describe() print SNMF_weights_desc # with rank = 10 and max_iter = 12, bad results - most components just 0 #%%
import numpy as np import nimfa V = np.random.rand(40, 100) snmf = nimfa.Snmf(V, seed="random_c", rank=10, max_iter=12, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0) snmf_fit = snmf()
def NMFAnalysis(expressionInputFile,NMFinputDir,Rank,platform,iteration=0,strategy="conservative"): root_dir = export.findParentDir(NMFinputDir)[:-1] if 'ExpressionInput' in root_dir: root_dir = export.findParentDir(root_dir) if 'NMF-SVM' in root_dir: root_dir = export.findParentDir(root_dir) export.findFilename(NMFinputDir) X=[] header=[] head=0 exportnam=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_versionr'+str(Rank)+'.txt' export_res=export.ExportFile(exportnam) exportnam_bin=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_binary'+str(Rank)+'.txt' export_res1=export.ExportFile(exportnam_bin) exportnam_bint=root_dir+'/NMF-SVM/NMF/round'+str(iteration)+'NMFsnmf_binary_t_'+str(Rank)+'.txt' export_res5=export.ExportFile(exportnam_bint) MF_input = root_dir+'/NMF-SVM/ExpressionInput/exp.NMF-MarkerFinder.txt' export.customFileCopy(expressionInputFile,root_dir+'/NMF-SVM/ExpressionInput/exp.NMF-MarkerFinder.txt') export_res4=open(string.replace(MF_input,'exp.','groups.'),"w") export_res7=open(string.replace(MF_input,'exp.','comps.'),"w") exportnam2=root_dir+'/NMF-SVM/SubtypeAnalyses/round'+str(iteration)+'Metadata'+str(Rank)+'.txt' export_res2=export.ExportFile(exportnam2) exportnam3=root_dir+'/NMF-SVM/SubtypeAnalyses/round'+str(iteration)+'Annotation'+str(Rank)+'.txt' export_res3=export.ExportFile(exportnam3) #if 'Clustering' in NMFinputDir: # count=1 # start=2 #else: count=0 start=1 #print Rank for line in open(NMFinputDir,'rU').xreadlines(): line=line.rstrip('\r\n') q= string.split(line,'\t') if head >count: val=[] val2=[] me=0.0 for i in range(start,len(q)): try: val2.append(float(q[i])) except Exception: continue me=np.median(val2) for i in range(start,len(q)): try: val.append(float(q[i])) except Exception: val.append(float(me)) #if q[1]==prev: X.append(val) else: export_res1.write(line) export_res.write(line) export_res1.write("\n") #export_res4.write(line) #export_res4.write("\n") export_res.write("\n") header=q head+=1 continue group=defaultdict(list) sh=[] X=np.array(X) #print X.shape mat=[] #mat=X mat=zip(*X) mat=np.array(mat) #print mat.shape #model = NMF(n_components=15, init='random', random_state=0) #W = model.fit_transform(mat) nmf = nimfa.Snmf(mat,seed="nndsvd", rank=int(Rank), max_iter=20,n_run=1,track_factor=False,theta=0.95) nmf_fit = nmf() W = nmf_fit.basis() W=np.array(W) #np.savetxt("basismatrix2.txt",W,delimiter="\t") H=nmf_fit.coef() H=np.array(H) # np.savetxt("coefficientmatrix2.txt",H,delimiter="\t") #print W.shape sh=W.shape export_res3.write("uid\tUID\tUID\n") if int(Rank)==2: par=1 else: par=2 #for i in range(sh[1]): # val=W[:,i] # me=np.mean(val) # st=np.std(val) # export_res2.write(header[i+1]) # for j in range(sh[0]): # if float(W[i][j])>=float(me+(par*st)): # # export_res2.write("\t"+str(1)) # else: # export_res2.write("\t"+str(0)) # # export_res2.write("\n") if platform != 'PSI': sh=W.shape Z=[] export_res5.write("uid") export_res2.write("uid") for i in range(sh[1]): export_res5.write("\t"+'V'+str(i)) export_res2.write("\t"+'V'+str(i)) export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n") export_res5.write("\n") export_res2.write("\n") export_res3.write("\n") for i in range(sh[0]): new_val=[] val=W[i,:] export_res2.write(header[i+1]) export_res5.write(header[i+1]) export_res4.write(header[i+1]) flag=True for j in range(sh[1]): if W[i][j]==max(val) and flag: export_res5.write("\t"+str(1)) export_res2.write("\t"+str(1)) new_val.append(1) export_res4.write("\t"+str(j+1)+"\t"+'V'+str(j)) flag=False else: export_res5.write("\t"+str(0)) export_res2.write("\t"+str(0)) new_val.append(0) Z.append(new_val) export_res5.write("\n") export_res2.write("\n") export_res4.write("\n") W=zip(*W) W=np.array(W) sh=W.shape Z=zip(*Z) Z=np.array(Z) for i in range(sh[0]): export_res.write('V'+str(i)) export_res1.write('V'+str(i)) for j in range(sh[1]): export_res.write("\t"+str(W[i][j])) export_res1.write("\t"+str(Z[i][j])) export_res.write("\n") export_res1.write("\n") export_res.close() export_res1.close() export_res2.close() export_res5.close() Orderedheatmap.Classify(exportnam_bint) return exportnam,exportnam_bin,exportnam2,exportnam3 else: W=zip(*W) W=np.array(W) sh=W.shape Z=[] for i in range(sh[0]): new_val=[] val=W[i,:] num=sum(i > 0.10 for i in val) if num >40 or num <3: compstd=True else: compstd=False me=np.mean(val) st=np.std(val) #print 'V'+str(i) export_res.write('V'+str(i)) export_res1.write('V'+str(i)) for j in range(sh[1]): if compstd: if float(W[i][j])>=float(me+(par*st)): export_res1.write("\t"+str(1)) new_val.append(1) else: export_res1.write("\t"+str(0)) new_val.append(0) else: if float(W[i][j])>0.1: export_res1.write("\t"+str(1)) new_val.append(1) else: export_res1.write("\t"+str(0)) new_val.append(0) export_res.write("\t"+str(W[i][j])) Z.append(new_val) export_res.write("\n") export_res1.write("\n") # Z=zip(*Z) Z=np.array(Z) sh=Z.shape Z_new=[] val1=[] Z1=[] dellst=[] export_res2.write("uid") export_res5.write("uid") for i in range(sh[0]): indices=[] val1=Z[i,:] sum1=sum(val1) flag=False indices=[index for index, value in enumerate(val1) if value == 1] for j in range(sh[0]): val2=[] if i!=j: val2=Z[j,:] sum2=sum([val2[x] for x in indices]) summ2=sum(val2) try: if float(sum2)/float(sum1)>0.5: if summ2>sum1: flag=True #print str(i) except Exception: continue if flag==False: Z1.append(val1) export_res2.write("\t"+'V'+str(i)) export_res5.write("\t"+'V'+str(i)) export_res3.write('V'+str(i)+"\t"+"Covariate"+"\t"+str(1)+"\n") export_res2.write("\n") export_res5.write("\n") Z1=np.array(Z1) Z=Z1 Z=zip(*Z) Z=np.array(Z) sh=Z.shape for i in range(sh[0]): val1=Z[i,:] #print sum(val1) #if sum(val)>2: if sum(val1)>2: val=[0 if x==1 else x for x in val1] else: val=val1 me=np.mean(val) st=np.std(val) export_res2.write(header[i+1]) export_res5.write(header[i+1]) for j in range(sh[1]): if strategy=="conservative": export_res2.write("\t"+str(val1[j])) export_res5.write("\t"+str(val1[j])) else: export_res2.write("\t"+str(val[j])) export_res5.write("\t"+str(val[j])) export_res2.write("\n") export_res5.write("\n") Z_new.append(val) Z_new=zip(*Z_new) Z_new=np.array(Z_new) sh=Z_new.shape export_res5.close() Orderedheatmap.Classify(exportnam_bint) if strategy=="conservative": return exportnam,exportnam_bin,exportnam2,exportnam3 else: return exportnam,exportnam_bin,exportnam2,exportnam3
def train(self): # Run MF print "Running non-negative MF....", strftime("%Y-%m-%d %H:%M:%S", gmtime()) source_result = None if self.method == "nmf": modelnmf = nimfa.Nmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "lfnmf": modelnmf = nimfa.Lfnmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "nsnmf": modelnmf = nimfa.Nsnmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "pmf": modelnmf = nimfa.Pmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "psmf": modelnmf = nimfa.Psmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "snmf": modelnmf = nimfa.Snmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "sepnmf": modelnmf = nimfa.Sepnmf(self.r1, rank=self.rank, max_iter=self.iter) else: print "No model is being recognized, stopped." sys.exit(1) model = modelnmf() source_result = np.array(model.fitted()) print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Turn vector of per user into distribution # And calculate the dot similarity # Then find the best data print("Transfer user vector into distribution.", strftime("%Y-%m-%d %H:%M:%S", gmtime())) item_pdf1 = [] for i in range(N_ITEM): count = 0 pdf = np.zeros(11) for j in range(N_USER): t = self.r1[i][j] if t == 0.0: t = source_result[i][j] # ignore the count if it is 0. if t < 1e-4: continue idx = min(int(math.floor(t / 0.1)), 10) pdf[idx] += 1 count += 1 if count > 1: pdf = pdf / count # print count item_pdf1.append(pdf) item_pdf2 = [] for i in range(N_ITEM): count = 0 pdf = np.zeros(11) for j in range(N_USER): if self.r2[i][j] > 0: count += 1 pdf[int(math.floor(self.r2[i][j] / 0.1))] += 1 if count > 1: pdf = pdf / count item_pdf2.append(pdf) # Transform now for further use: matrix[user] # self.r1 = self.r1.T # self.r2 = self.r2.T print "Calculate cost matrix....", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Calculate cost matrix for items # matrix[item r1][item r2] # Uses 5 threads to run this slowest part. partition = 5 matrix = [[] for i in range(partition)] threads = [] ll = np.split(np.array(range(N_ITEM)), partition) for index in range(partition): thread = Thread(target=self.threadFunc, args=(matrix[index], ll[index], item_pdf1, item_pdf2)) threads.append(thread) thread.start() for thread in threads: thread.join() matrix = np.array(np.concatenate(matrix, axis=0)) print "Matrix shape: ", matrix.shape print "Hungarian running maximum matching....", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) match1to2, match2to1 = hungarian.lap(matrix) print "End of matching!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Create item-matching version # trans[item in r2] trans = [] for item2 in range(N_ITEM): trans.append(source_result[match2to1[item2]]) trans = np.array(trans).T # Find most similar user pair print "Find most similar user pair..... Write file...", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) self.writeTrans(trans) print "Done, enter cpp mode", strftime("%Y-%m-%d %H:%M:%S", gmtime())
def sparse_color_deconvolution(im_rgb, w_init, beta): """Performs adaptive color deconvolution. Uses sparse non-negative matrix factorization to adaptively deconvolve a given RGB image into intensity images representing distinct stains. Similar approach to ``color_deconvolution`` but operates adaptively. The input RGB image `im_rgb` consisting of RGB values is first transformed into optical density space as a row-matrix, and then is decomposed as :math:`V = W H` where :math:`W` is a 3xk matrix containing stain vectors in columns and :math:`H` is a k x m*n matrix of concentrations for each stain vector. The system is solved to encourage sparsity of the columns of :math"`H` i.e. most pixels should not contain significant contributions from more than one stain. Can use a hot-start initialization from a color deconvolution matrix. Parameters ---------- im_rgb : array_like An RGB image of type unsigned char, or a 3xN matrix of RGB pixel values. w_init : array_like A 3xK matrix containing the color vectors in columns. Should not be complemented with ComplementStainMatrix for sparse decomposition to work correctly. beta : double Regularization factor for sparsity of :math:`H` - recommended 0.5. Returns ------- stains : array_like An rgb image with deconvolved stain intensities in each channel, values ranging from [0, 255], suitable for display. w : array_like The final 3 x k stain matrix produced by NMF decomposition. Notes ----- Return values are returned as a namedtuple See Also -------- histomicstk.preprocessing.color_deconvolution.ColorDeconvolution References ---------- .. [1] J. Xu, L. Xiang, G. Wang, S. Ganesan, M. Feldman, N.N. Shih, H. Gilmore, A. Madabhushi, "Sparse Non-negative Matrix Factorization (SNMF) based color unmixing for breast histopathological image analysis," in IEEE Computer Graphics and Applications, vol.46,no.1,pp.20-9, 2015. """ # determine if input is RGB or pixel-matrix format if len(im_rgb.shape) == 3: # RBG image provided m = im_rgb.shape[0] n = im_rgb.shape[1] im_rgb = np.reshape(im_rgb, (m * n, 3)).transpose() elif len(im_rgb.shape) == 2: # pixel matrix provided m = -1 n = -1 if im_rgb.shape[2] == 4: # remove alpha channel if needed im_rgb = im_rgb[:, :, (0, 1, 2)] # transform input RGB to optical density values im_rgb = im_rgb.astype(dtype=np.float32) im_rgb[im_rgb == 0] = 1e-16 ODfwd = color_conversion.rgb_to_od(im_rgb) if w_init is None: # set number of output stains K = 3 # perform NMF without initialization Factorization = nimfa.Snmf(V=ODfwd, seed=None, rank=K, version='r', beta=beta) Factorization() else: # get number of output stains K = w_init.shape[1] # normalize stains to unit-norm for i in range(K): Norm = np.linalg.norm(w_init[:, i]) if (Norm >= 1e-16): w_init[:, i] /= Norm else: print 'error' # throw error # estimate initial H given p Hinit = np.dot(np.linalg.pinv(w_init), ODfwd) Hinit[Hinit < 0] = 0 # perform regularized NMF Factorization = nimfa.Snmf(V=ODfwd, seed=None, W=w_init, H=Hinit, rank=K, version='r', beta=beta) Factorization() # extract solutions and make columns of "w" unit-norm w = np.asarray(Factorization.basis()) H = np.asarray(Factorization.coef()) for i in range(K): Norm = np.linalg.norm(w[:, i]) w[:, i] /= Norm H[i, :] *= Norm # reshape H matrix to image if m == -1: stains_float = np.transpose(H) else: stains_float = np.reshape(np.transpose(H), (m, n, K)) # transform type stains = np.copy(stains_float) stains[stains > 255] = 255 stains = stains.astype(np.uint8) # build named tuple for outputs Unmixed = collections.namedtuple('Unmixed', ['Stains', 'W']) Output = Unmixed(stains, w) # return solution return Output
def factorization(V, list_of_arrays): snmf = nimfa.Snmf(V, seed="random_c", rank=4, max_iter=20, version='r', eta=1.) snmf_fit = snmf() globals()[list_of_arrays[0]] = snmf.basis() globals()[list_of_arrays[1]] = snmf.coef()
H_min_mat = np.zeros([20, 2]) W_min_mat = np.zeros([20, 2]) for b in range(0, 2): for t in range((20)): k = t + 1 print(k) np.random.seed(0) w_r = np.random.random((40320, k)) h_r = np.random.random((k, 285)) betaa = (b) / 10.0 snmf = nimfa.Snmf(np.matrix(data), rank=k, beta=betaa, max_iter=1000, W=w_r, H=h_r, version='r', eta=1., min_residuals=0.0001) snmf_fit = snmf() W = snmf_fit.fit.W H = snmf_fit.fit.H spsh[t, b] = snmf_fit.fit.sparseness()[1] spsw[t, b] = snmf_fit.fit.sparseness()[0] evals[t, b] = snmf_fit.fit.evar() kl[t, b] = snmf_fit.distance(metric='kl') bet[t, b] = calculate_error(D, W, H) rss[t, b] = snmf_fit.fit.rss() #W_min_mat[t,b] = np.matrix.min(W)
def nmf_init(mat: np.ndarray, num_clusters=int): nmf = nimfa.Snmf(mat, rank=num_clusters) nmf_fit = nmf() return nmf_fit.basis().astype(bool).astype(float)