def run_nmf(V, rank=12, max_iter=5000): """ Run standard nonnegative matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ # Euclidean nmf = nimfa.Nmf(V, seed="random_vcol", rank=rank, max_iter=max_iter, update='euclidean', objective='fro') fit = nmf() print_info(fit) # divergence nmf = nimfa.Nmf(V, seed="random_vcol", rank=rank, max_iter=max_iter, initialize_only=True, update='divergence', objective='div') fit = nmf() return print_info(fit)
def generateRssPlot(spectra_data): data = np.transpose(spectra_data.values) #Reorganizes each column of the data matrix permutated_data = permuteColumns(data) #Range of numbers from 30-100 with interval of 2 - change this to change the k-values to test k = np.arange(30, 100, 2) rss = [] #Loops through each k value and creates an NMF model for both the permutated data and original data #Adds the ratio between the two rss values to an array for x in k: nmf_model = nimfa.Nmf(data, rank=x) nmf_model() data_rss = nmf_model.rss() permutation_model = nimfa.Nmf(permutated_data, rank=x) permutation_model() permutation_rss = permutation_model.rss() rss.append(data_rss/permutation_rss) print(k) print(rss) ax = graphSetup("MassSpectra RSS Plot", "K Value (# of Basis Vectors)", "Ratio Between RSS Value", [np.min(k), np.max(k)], [int(np.min(rss)), mt.ceil(np.max(rss))]) rss_plt = ax.plot(k, rss) savePlot()
def NMFRun(M_run, args, projdir, samples, subtypes_dict): if args.rank > 0: if args.verbose: eprint("Running NMF with rank =", args.rank) model = nimfa.Nmf(M_run, rank=args.rank, update="divergence", objective='div', n_run=1, max_iter=200) model_fit = model() evar = model_fit.fit.evar() maxind = args.rank elif args.rank == 0: if args.verbose: eprint("Finding optimal rank for NMF...") evarprev = 0 for i in range(1,6): model = nimfa.Nmf(M_run, rank=i, update="divergence", objective='div', n_run=1, max_iter=200) model_fit = model() evar = model_fit.fit.evar() if args.verbose: eprint("Explained variance for rank " + str(i) + ":", evar) # if evar > 0.8: if(i > 2 and evar - evarprev < 0.001): if args.verbose: eprint(textwrap.dedent("""\ Stopping condition met: <0.1 percent difference in explained variation between ranks """)) model = nimfa.Nmf(M_run, rank=i-1, update="divergence", objective='div', n_run=1, max_iter=200) model_fit = model() break evarprev = evar W = model_fit.basis() H = model_fit.coef() out = collections.namedtuple('Out', ['W', 'H'])(W, H) return out
def extract(genomes, totalIterationsPerCore, numberSignaturesToExtract, WPerCore, HPerCore, genomeErrorsPerCore, genomesReconstructedPerCore): totalMutationTypes = size(data, 0) totalGenomes = size(data, 1) processCount = 0 for i in range(totalIterationsPerCore): #replacing zeroes w small number to avoid underflow bootstrapGenomes = numpy.maximum(bootstrapCancerGenomes(genomes), numpy.finfo(numpy.float32).eps) nmf = nimfa.Nmf(bootstrapGenomes, max_iter=MAX_ITER, rank=numberSignaturesToExtract, update=UPDATE_EQUATION, objective=OBJECTIVE_FUNC, conn_change=CONN_CHANGE, test_conv=TEST_CONV) # max iter is actual 1 mill nmf_fit = nmf() for j in range(numberSignaturesToExtract): total = sum(nmf_fit.basis()[:, j]) nmf_fit.basis()[:, j] = nmf_fit.basis()[:, j] / total nmf_fit.coef()[j, :] = nmf_fit.coef()[j, :] / total genomeErrorsPerCore[:, :, i] = bootstrapGenomes - nmf_fit.basis( ) * nmf_fit.coef() genomesReconstructedPerCore[:, :, i] = nmf_fit.basis() * nmf_fit.coef() WPerCore[:, processCount:(processCount + numberSignaturesToExtract)] = nmf_fit.basis() HPerCore[processCount:(processCount + numberSignaturesToExtract), :] = nmf_fit.coef() processCount = processCount + numberSignaturesToExtract
def one_run(iterador, df, rank,type_nmf): V=np.array(df) V=np.transpose(V) D={} for i in iterador: d={} if(type_nmf=="nmf"): nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=1000000, update='divergence', objective='conn', conn_change=40) if(type_nmf=="snmf"): nmf = nimfa.Snmf(V, rank=rank, seed="random_vcol", max_iter=1000000, conn_change=40, version = 'l') if(type_nmf=="nsnmf"): nmf = nimfa.Nsnmf(V, rank=rank, seed="random_vcol", max_iter=1000000, objective='conn', conn_change=40) fit = nmf() S=fit.summary() SS={} SS['connectivity']=S['connectivity'] SS['euclidean']=S['euclidean'] SS['evar']=S['evar'] SS['kl']=S['kl'] SS['rss']=S['rss'] SS['sparseness']=S['sparseness'] d['summary']=SS d['n_iter']=fit.n_iter d['distance']=fit.distance H=pd.DataFrame(fit.basis()) E=extract_norm(H) d['basis']=E['P'] d['coef']=pd.DataFrame(E['R']*fit.coef()) D[i]=d return D
def factorize(V): """ Perform NMF - Divergence factorization on the sparse Medlars data matrix. Return basis and mixture matrices of the fitted factorization model. :param V: The Medlars data matrix. :type V: `scipy.sparse.csr_matrix` """ nmf = nimfa.Nmf(V, seed="random_vcol", rank=12, max_iter=15, update="divergence", objective="div") print("Algorithm: %s\nInitialization: %s\nRank: %d" % (nmf, nmf.seed, nmf.rank)) fit = nmf() sparse_w, sparse_h = fit.fit.sparseness() print("""Stats: - iterations: %d - KL Divergence: %5.3f - Euclidean distance: %5.3f - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'), sparse_w, sparse_h)) return fit.basis(), fit.coef()
def nnmf(X0,k): nnmf=nimfa.Nmf(X0,rank=k,max_iter=10, lambda_w=0.8,lambda_h=0.8) fit_nnmf=nnmf() # print(fit_nnmf) matrix_nn=fit_nnmf.fitted() # break return matrix_nn
def pick_rank_vis(data, max_factor, title): V = data.T nmf = nimfa.Nmf(V, max_iter=1000000, update='euclidean', rank=2, track_error=True) r = nmf.estimate_rank(rank_range=range(2,max_factor)) result_array = [] for rank, vals in r.items(): result_array.append([rank, vals['rss'], vals['cophenetic']]) df = pd.DataFrame(result_array, columns=['rank', 'rss', 'coph']) fig, ax1 = plt.subplots() plt.xlabel('Number of Kmer signatures') ax2 = ax1.twinx() ax1.set_ylabel('Cophenetic correlation coefficient', color = 'lightsalmon') ax2.set_ylabel('RSS', color = 'cadetblue') for i in df.iterrows(): coph = df['coph'] recon_err = df['rss'] rank = df['rank'] ax1.plot(df['rank'], coph, color = 'lightsalmon') ax2.plot(df['rank'], recon_err, color = 'cadetblue') plt.savefig("/pollard/home/abustion/deep_learning_microbiome/analysis/NMF/alexandrov" + str(title) + "_011418.png", bbox_inches='tight', dpi=300)
def alexandrov(data, max_factor, title): fig, ax1 = plt.subplots() plt.xlabel('Number of Kmer signatures') ax2 = ax1.twinx() ax1.set_ylabel('stability', color = 'red') ax2.set_ylabel('reconstruction error', color = 'blue') for i in range(2, max_factor): nmf = nimfa.Nmf( data.T, rank = i, max_iter = 1000, n_run = 50, track_factor = True ) nmf_fit = nmf() sm = nmf_fit.summary() coph = sm['cophenetic'] recon_err = sm['rss'] ax1.scatter(i, coph, color = 'r') ax2.scatter(i, recon_err, color = 'b') plt.savefig("/pollard/home/abustion/deep_learning_microbiome/analysis/NMF/alexandrov" + str(title) + ".png")
def RunTumor(datasetname, tumorname, data, mink, maxk, num_iterations, init): k_cophs = {} data = data.as_matrix() data = np.matrix.transpose(data) #for k in xrange(mink,maxk): for k in range(mink, maxk): mat = ComputeAverageConsensusMatrix(data, k, num_iterations, init) mat = reorder(mat) A = np.asarray(mat) k_cophs[k] = coph_cor(A) savematrixplot(datasetname, tumorname, A, k) print(tumorname, "cophs:", k_cophs) savecophcorplot(datasetname, tumorname, k_cophs) rank = evaluateStability(k_cophs) nmf = nimfa.Nmf(data, rank=rank, seed="random_vcol", max_iter=200, update='euclidean', objective='conn', conn_change=40) nmf_fit = nmf() generateHeatPlot(datasetname, tumorname, '', data, nmf_fit.basis(), nmf_fit.coef())
def run_one(V, rank): """ Run standard NMF on leukemia data set. 50 runs of Standard NMF are performed and obtained consensus matrix averages all 50 connectivity matrices. :param V: Target matrix with gene expression data. :type V: `numpy.ndarray` :param rank: Factorization rank. :type rank: `int` """ print("================= Rank = %d =================" % rank) consensus = np.zeros((V.shape[1], V.shape[1])) for i in range(50): nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=200, update='euclidean', objective='conn', conn_change=40) fit = nmf() print("%2d/50 : %s - init: %s (%3d/200 iterations)" % (i + 1, fit.fit, fit.fit.seed, fit.fit.n_iter)) consensus += fit.fit.connectivity() consensus /= 50. p_consensus = reorder(consensus) plot(p_consensus, rank)
def mf(self,k,max_iter,alpha): global matrix global W vectors = [list(self.model[word]) for word in self.wordList] W = np.array(vectors,dtype = np.float32) del vectors H = np.transpose(np.random.rand(W.shape[1],len(articleDict))) for i in range(H.shape[0]): rate = 1 v = self.matrix.getrow(i).toarray() for j in range(max_iter): g = W.dot(H[i]) - v H[i] -= rate * alpha * np.transpose(W).dot(np.array(g[0])) H[i] = np.array(map((lambda x:max(x,0)),H[i])) rate *= 0.5 print "Begin NMF" nmf = nimfa.Nmf(sparse.csr_matrix(np.transpose(H)),seed = "random_vcol",rank = k,max_iter = 10) nmf_fit = nmf() H1 = nmf_fit.basis() H2 = nmf_fit.coef() print "NMF ok" construct = W.dot(H1.toarray()) wordVectors = {} myESA = ESA.ESA() wordsim = myESA.getWordSim() for i in range(len(construct)): if self.wordList[i] in wordsim: wordVectors[self.wordList[i]] = construct[i] cPickle.dump([columnNum,wordVectors],open("data/mc_matrix",'wb'))
def run_one(V, rank): """ Run standard NMF on medulloblastoma data set. 50 runs of Standard NMF are performed and obtained consensus matrix averages all 50 connectivity matrices. :param V: Target matrix with gene expression data. :type V: `numpy.ndarray` :param rank: Factorization rank. :type rank: `int` """ print("================= Rank = %d =================" % rank) consensus = np.zeros((V.shape[1], V.shape[1])) for i in range(50): nmf = nimfa.Nmf(V, rank=rank, seed="random_vcol", max_iter=200, update='euclidean', objective='conn', conn_change=40) fit = nmf() print("Algorithm: %s\nInitialization: %s\nRank: %d" % (nmf, nmf.seed, nmf.rank)) consensus += fit.fit.connectivity() consensus /= 50. p_consensus = reorder(consensus) plot(p_consensus, rank)
def create_nmf_summary(self, data, ranks, n_runs): if any(arg is None for arg in [ranks, n_runs]): raise ValueError( "Either ranks or n_runs is empty. Recreate the NMFCC class with these parameters inputted." ) nmf = nimfa.Nmf(data, seed="random_vcol") summary = nmf.estimate_rank(rank_range=ranks, n_run=n_runs, what='all') self.summary = summary return summary
def _nmf(E, k): if E.min().min() < 0: V = E.values - E.min().min() else: V = E.values nmf = nimfa.Nmf(V, rank=int(k), seed="random_vcol", max_iter=20000, update='euclidean') fit = nmf() return fit.fit.H.A.T
def NMFfeatures_helper(h): f = np.loadtxt("../miscs/{0}/taxi-CA-h{1}.matrix".format(year, h), delimiter=" ") d1, d2 = f.shape assert d1 == d2 and d1 == 77 nmf = nimfa.Nmf(f, rank=4, max_iter=100) #, update="divergence", objective="conn", conn_change=50) nmf_fit = nmf() src = nmf_fit.basis() dst = nmf_fit.coef() res = np.concatenate( (src, dst.T), axis=1 ) assert res.shape == (77, 8) return res
def NMFfeatures(): f = np.loadtxt("../miscs/taxiFlow.csv", delimiter=",") nmf = nimfa.Nmf(f, rank=4, max_iter=30, update="divergence", objective="conn", conn_change=50) nmf_fit = nmf() src = nmf_fit.basis() dst = nmf_fit.coef() return np.concatenate((src, dst.T), axis=1)
def nmf_library(V, W_init, correct_H): #comparisons with non-negative matrix factorization lsnmf = nimfa.Lsnmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) nmf = nimfa.Nmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) icm = nimfa.Icm(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) bd = nimfa.Bd(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) pmf = nimfa.Pmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) #lfnmf = nimfa.Lfnmf(V, seed=None, rank=3, max_iter=100, H = np.array([0.,0.,0.]).reshape(-1,1), W = W_init) lsnmf_fit = lsnmf() nmf_fit = nmf() icm_fit = icm() bd_fit = bd() pmf_fit = pmf() lsnmf_error = mean_absolute_error( correct_H, normalized(np.array(lsnmf.H).reshape(-1, ))) nmf_error = mean_absolute_error(correct_H, normalized(np.array(nmf.H).reshape(-1, ))) icm_error = mean_absolute_error(correct_H, normalized(np.array(icm.H).reshape(-1, ))) bd_error = mean_absolute_error(correct_H, normalized(np.array(bd.H).reshape(-1, ))) pmf_error = mean_absolute_error(correct_H, normalized(np.array(pmf.H).reshape(-1, ))) return [lsnmf_error, nmf_error, icm_error, bd_error, pmf_error]
def do_nmf(V): nmf = nimfa.Nmf(V, seed='random_vcol', rank=10, max_iter=100) nmf_fit = nmf() W = nmf_fit.basis() print('Basis matrix:\n%s' % W) H = nmf_fit.coef() print('Mixture matrix:\n%s' % H) print("starting") r = nmf.estimate_rank(rank_range=[10, 15, 20, 25], what='all') # pp_r = '\n'.join('%d: %5.3f' % (rank, vals['all']) for rank, vals in r.items()) print('Rank estimate:\n%s' % r)
def factorization(self, cv_results_file): """ Matrix factorization, saves predictions to self.predictions and mask to self.mask :param cv_results_file: file for saving cv scores """ print('\nDfmf') selected_features = self.selected_features mask = self.split_train_test(self.users_ratings, 0.2) R12 = self.users_ratings R23 = selected_features R14 = self.users # Parameters choice print('\nParameters\n') #parameters = [2, 4, 6, 8, 10] parameters = [2, 4, 6, 8, 10, 12] k = 3 #best_p_t1 = self.cross_validation(k, parameters, mask, R12, cv_results_file) #print(str(best_p_t1) + '\n') best_p_t1 = 10 #best_p_t2 = 12 #best_p_t3 = 2 #best_p_t4 = 2 V = spr.csr_matrix(R12) #V.todense() nmf = nimfa.Nmf(V, max_iter=200, rank=best_p_t1, update='euclidean', objective='fro') nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() sm = nmf_fit.summary() #R12_pred = np.dot(W.todense(), H.todense()) R12_pred = np.dot(W, H) self.predictions = R12_pred self.mask = mask self.true_values = R12
def calculate_nmf_error(self, mixture, n_bases, dist_type, iterations, attempts, seed): div = nussl.transformers.TransformerNMF.KL_DIVERGENCE nimfa_type = 'divergence' if dist_type == div else dist_type for i in range(attempts): # Set up nussl NMF nussl_nmf = nussl.TransformerNMF(mixture, n_bases, max_num_iterations=iterations, distance_measure=dist_type, seed=seed) # Run nussl NMF nussl_nmf.transform() # Set up nimfa NMF nimfa_nmf = nimfa.Nmf(mixture, max_iter=iterations, rank=n_bases, update=nimfa_type, W=nussl_nmf.template_dictionary, H=nussl_nmf.activation_matrix ) # init to same matrices as nussl # Run nimfa NMF nmf_fit = nimfa_nmf() # Dot the results nimfa_est = np.dot(nmf_fit.basis(), nmf_fit.coef()) nussl_est = np.dot(nussl_nmf.template_dictionary, nussl_nmf.activation_matrix) # calculate errors max_nussl_error = np.max(np.abs(nussl_est - mixture) / mixture) max_nimfa_error = np.max(np.abs(nimfa_est - mixture) / mixture) max_diff = max_nussl_error - max_nimfa_error # IF nussl's max error is bigger than nimfa's # AND nussl's max error bigger than the specified max error (0.05, or 5%) # AND the difference between the max errors is larger than 0.05 # THEN we throw an exception # i.e., make sure nussl's results are close to nimfa's if max_nussl_error > max_nimfa_error \ and max_nussl_error > self.max_error_pct \ and max_diff > self.max_error_pct: raise Exception( 'max nussl error is larger than nimfa and self.max_error_pct' )
def _nmf(E, k): if E.min().min() < 0: V = E.as_matrix() - E.min().min() else: V = E.as_matrix() nmf = nimfa.Nmf(V, rank=int(k), seed="random_vcol", max_iter=20000, update='euclidean') fit = nmf() print(nmf.rss()) return fit.fit.H.A.T
def NMFmod(self, rank): prng = np.random.RandomState(self.seed) W_init = prng.rand(self.M_run.shape[0], rank) H_init = prng.rand(rank, self.M_run.shape[1]) model = nimfa.Nmf(self.M_run, rank=rank, # seed=None, H=H_init, W=W_init, update="divergence", objective='div', n_run=1, max_iter=200) return model
def GetLatentSpace(train, rank=10): model = nimfa.Nmf(train.todense(), seed='random_vcol', rank=rank, max_iter=100) mfit = model() M = np.array(mfit.coef()) N = np.array(mfit.basis()) # GetLatentSpace CustomerNameIdxs M = M.T M_normalized = M / np.linalg.norm(M, axis=1)[:, np.newaxis] # GetLatentSpace tickers N = N N_normalized = N / np.linalg.norm(N, axis=1)[:, np.newaxis] return M_normalized, N_normalized
def NMFfeatures(h): t = np.genfromtxt("../miscs/{0}/taxi-h{1}.vec".format(year, h), delimiter=" ", skip_header=1) tid = t[:,0] l = len(tid) tid = tid.astype(int) tid.sort() idx = np.searchsorted(sortedId, tid) print "@hour {0}, #regions {1}".format(h, len(idx)) f = np.loadtxt("../miscs/{0}/taxi-h{1}.matrix".format(year, h), delimiter=",") fp = f[idx,:] fp = fp[:, idx] assert fp.shape==(l, l) nmf = nimfa.Nmf(fp, rank=10, max_iter=30, update="divergence", objective="conn", conn_change=50) nmf_fit = nmf() src = nmf_fit.basis() dst = nmf_fit.coef() return np.concatenate( (src, dst.T), axis=1 ), tid
def NMF(V, rank): '''Run NMF for a target matrix. V: the target matrix to decompose.''' nmf = nimfa.Nmf(V, seed="random_c", rank=rank, n_run=1, max_iter=2000, update='divergence', objective='div') nmf_fit = nmf() W = nmf_fit.basis() K = W.shape[1] print('Stop at iteration #: %d' % nmf_fit.summary()["n_iter"]) return { "W": W, "H": nmf_fit.coef(), "K": K, "KLD": nmf_fit.distance(metric='kl') }
def run(self, output_file): print "Running non-negative MF....", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) if self.method == 'nmf': modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "lfnmf": modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "nsnmf": modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "pmf": modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "psmf": modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "snmf": modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "sepnmf": modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter) else: print "No model is being recognized, stopped." sys.exit(1) model = modelnmf() self.result = np.array(model.fitted()) print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime()) with open(output_file, "r+") as file: query = file.readlines() file.seek(0) file.truncate() for line in query: list = line.split() newline = "%s %s %f\n" % ( list[0], list[1], self.result[int(list[0])][int(list[1])] ) file.write(newline)
def produce(self, inputs): warnings.filterwarnings( "ignore") # for removing warnings thrown by nimfa # for testing # a = numpy.array([[1,0,1,0,1],[1,0,1,0,1],[1,0,1,0,1]]) # b = numpy.array([[1,0],[1,0],[1,0],[1,0],[1,0]]) # print(type(a)) # print(type(self._W[0])) nmf = nimfa.Nmf(V=numpy.array(inputs.values), seed=self._seed, W=self._W[0], H=self._H[0], rank=self._rank, update=self._update, objective=self._objective, min_residuals=self._learning_rate) nmf_fit = nmf() W = nmf_fit.basis() H = nmf_fit.coef() column_names = [ 'row_latent_vector_' + str(i) for i in range(self._rank) ] W = pd.DataFrame(data=W, columns=column_names) # print(type(W)) #TODO: Column latent vector column_names = [ 'column_latent_vector_' + str(i) for i in range(inputs.shape[1]) ] H = pd.DataFrame(data=H, columns=column_names) W.reset_index(drop=True, inplace=True) H.reset_index(drop=True, inplace=True) result = pd.concat([W, H], axis=1) # print(result.head(10)) return result
def getNmf(cls, path="../../data/intern_samplelog.csv"): #data = pandas.read_table(path,header=0) data = pandas.read_csv(path) data['site'] = data['site'].str.replace( "media_", "" ) #http://stackoverflow.com/questions/24037507/converting-string-objects-to-int-float-using-pandas #data["floor_price"]=data["floor_price"].str.replace("NA","0") data["user"] = data["user"].str.replace("user_", "") del data["click"] del data["advertiser"] del data["os"] del data["floor_price"] #data["os"]=data["os"].str.replace("iOS","1") #data["os"]=data["os"].str.replace("Android","2") vec = np.matrix(data.as_matrix()) nmf = nimfa.Nmf(vec, seed='random_vcol', rank=20, max_iter=50) nmf_fit = nmf() print('Rss: %5.4f' % nmf_fit.fit.rss()) print('Evar: %5.4f' % nmf_fit.fit.evar()) print('K-L divergence: %5.4f' % nmf_fit.distance(metric='kl')) print('Sparseness, W: %5.4f, H: %5.4f' % nmf_fit.fit.sparseness()) return nmf_fit
def generateSpectraPlot(spectra_data): bin_lower_bounds = [] # Loops through each column header in the .csv file to get the lower bound for plotting for column in spectra_data.columns: bound = re.findall(r"[-+]?\d*\.\d+|\d+", column) # Parses the float bound from the column header bin_lower_bounds.append(float(bound[0])) ax = graphSetup("MassSpectra NMF Basis Vector Plot", "Bin Lower Bounds [m/z]", r"$Intensity\,[\%]$", [np.min(bin_lower_bounds), np.max(bin_lower_bounds)], [0,100]) # Convert to np array and transpose it so that the bin numbers are the rows and it's vectors of spectra intensity data = np.transpose(spectra_data.values) nmf_model = nimfa.Nmf(data) basis = nmf_model().basis() intensities = [] for vector in basis: print(np.linalg.norm(vector)) intensities.append(np.linalg.norm(vector)) # Adds the magnitude of the intensity vector to the array for graphing intensities = intensities/np.max(intensities) * 100 spectra_plt = ax.bar(bin_lower_bounds, intensities) savePlot()