def do_nmf(self,verbose=False): ## Filter out the low frequency ones. fidx = [] fwords = [] for i,w in enumerate(self.idx2word): if self.lexical_counts[w] >= self.min_count_nmf: fidx.append(i) fwords.append(w) fidx.append(self.alphabet_size) fwords.append(self.start_symbol) self.nmf = nmf.NMF(self.unigram_features[fidx,:], fwords, ssf=0) ## This may not be the start symbol of the grammar. start_idx = len(fidx)-1 self.nmf.start(start_idx) ## What does this even mean then? self.kernels = [self.start_symbol] assert not self.start_symbol in self.word2idx self.nmf.excluded.add(start_idx) while len(self.kernels) < self.nonterminals: a, ai = self.nmf.find_but_dont_add() if verbose: print("Adding kernel", a, "count", self.lexical_counts[a]) self.nmf.add_basis(ai) self.kernels.append(a) self.nmf.initialise_frank_wolfe()
def test_nmf(): print('Testing NMF on synthetic data...') alpha = 1 beta = 10 n_components = 2 shape = (8, 6) n = nmf.NMF(shape=shape, n_components=n_components, alpha=alpha, beta=beta) n_est = nmf.NMF(shape=n.shape, n_components=n.n_components, alpha=alpha, beta=beta) elbo = n_est.fit(n.X, max_iter=2000) for i in range(len(elbo)): if i == 0: print(elbo[0]) else: print('{} ; {}'.format(elbo[i], elbo[i] - elbo[i - 1])) print('Num. iterations: {}'.format(len(elbo) - 1)) print('done.')
def main(): path = r'G:\School\thesis\DS4_T\wav_renamed' settings = "settings.set" WavFiles = glob.glob(path + '\\*.' + 'wav') for index in WavFiles: spec = readwav(index) nmfObject = nmf.NMF(spec[2], settings) nmfObject.printparas() nmfObject.decompose() nmfObject.writeresults("test1") os.system("pause") #show(spec) return
def main(): V = np.array([[1, 1, 1, 1, 1], [0, 1, 0, 1, 0], [0, 1, 0, 1, 0]]) r = 2 W, H = nmf.NMF(V, r, lamb=0, maxit=2000) Ve = np.dot(W, H) np.set_printoptions(precision=2, suppress=True) for i in range(r): Hi = np.zeros((r, np.shape(H)[1])) Hi[i, :] = H[i, :] TMP = np.dot(W, Hi) pl.imsave("V" + str(i) + ".png", TMP, cmap=pl.cm.gray) pl.imsave("V.png", V, cmap=pl.cm.gray) pl.imsave("W.png", W, cmap=pl.cm.gray) pl.imsave("H.png", H, cmap=pl.cm.gray) pl.imsave("Ve.png", Ve, cmap=pl.cm.gray) return 0
def init_nmf_with_kernels(self, kernels): ## Given known clusters initialise the NMF so we can do the Frank-Wolfe estimation ## for the unary rules. fidx = [] fwords = [] for i,w in enumerate(self.idx2word): fidx.append(i) fwords.append(w) fidx.append(self.alphabet_size) fwords.append(self.start_symbol) self.nmf = nmf.NMF(self.unigram_features[fidx,:], fwords, ssf=0) ## This may not be the start symbol of the grammar. start_idx = len(fidx)-1 self.nmf.start(start_idx) self.kernels = [ kernels[0] ] for a in kernels[1:]: ai = self.word2idx[a] self.nmf.add_basis(ai) self.kernels.append(a) self.nmf.initialise_frank_wolfe()
with open('mnist.txt', 'r') as mFr: for line in mFr: a = line.split(',') b = [] for item in a: b.append(float(item)) categoryM.append(b[-1]) mDataSet.append(b) mFr.close() calculate.calculate(kMeans.kMeans(gDataSet, 2), categoryG, 2) calculate.calculate(kMeans.kMeans(mDataSet, 10), categoryM, 10) calculate.calculate(nmf.NMF(gDataSet, 2), categoryG, 2) calculate.calculate(nmf.NMF(mDataSet, 10), categoryG, 10) calculate.calculate(spectral.spectral(gDataSet, 2, 3), categoryG, 2) calculate.calculate(spectral.spectral(gDataSet, 2, 6), categoryG, 2) calculate.calculate(spectral.spectral(gDataSet, 2, 9), categoryG, 2) calculate.calculate(spectral.spectral(mDataSet, 10, 3), categoryM, 10) calculate.calculate(spectral.spectral(mDataSet, 10, 6), categoryM, 10) calculate.calculate(spectral.spectral(mDataSet, 10, 9), categoryM, 10)
def _nmf(): return nmf.NMF(files='texts')
#W_v = np.eye(6700) n = len(X) m = len(X[0]) #X = np.array(X) W_u = np.array(W_u) W_v = np.array(W_v) # Initialize the W_u & W_v D_u = np.zeros(shape=(n, n)) D_v = np.zeros(shape=(m, m)) sum_W_u = np.sum(W_u, axis=1) sum_W_v = np.sum(W_v, axis=1) for i in range(n): D_u[i][i] = sum_W_u[i] for h in range(m): D_v[h][h] = sum_W_v[h] U = np.random.rand(n, k) H = np.random.rand(k, k) V = np.random.rand(m, k) U_final, H_final, V_final = nmf.NMF(X, U, H, V, D_u, W_u, D_v, W_v) print("\nU_final:\n\n", U_final) print("\nH_final:\n\n", H_final) print("\nV_final:\n\n", V_final)
df = pd.read_pickle('articles.pkl') content = df.content.as_matrix() content = df.content.as_matrix() n_topics = 7 # vectorize our content vector = TfidfVectorizer(max_features=5000, stop_words='english') vector_matrix = vector.fit_transform(content).toarray() features = np.array(vector.get_feature_names()) our_nmf = nmf.NMF(vector_matrix, n_topics, max_iter=100) W, H = our_nmf.fit() ### Using Your NMF Function print "MSR: {}".format(our_nmf.msr()) for row in H: ind = row.argsort()[:-10 - 1:-1] print features[ind] ### Built-In NMF print "\nsklearn.NMF\n" skl_nmf = sklearn.decomposition.NMF(n_components=n_topics, random_state=1,