Example #1
0
	def do_nmf(self,verbose=False):
		## Filter out the low frequency ones.
		fidx = []
		fwords = []
		for i,w in enumerate(self.idx2word):
			if self.lexical_counts[w] >= self.min_count_nmf:
				fidx.append(i)
				fwords.append(w)
		fidx.append(self.alphabet_size)
		fwords.append(self.start_symbol)
		self.nmf = nmf.NMF(self.unigram_features[fidx,:], fwords, ssf=0)
		## This may not be the start symbol of the grammar.

		start_idx = len(fidx)-1
		self.nmf.start(start_idx)
		## What does this even mean then?
		self.kernels = [self.start_symbol]
		assert not self.start_symbol in self.word2idx
		self.nmf.excluded.add(start_idx)
		while len(self.kernels) < self.nonterminals:
			a, ai = self.nmf.find_but_dont_add()
			if verbose:
				print("Adding kernel", a, "count", self.lexical_counts[a])
			self.nmf.add_basis(ai)
			self.kernels.append(a)
		self.nmf.initialise_frank_wolfe()
Example #2
0
def test_nmf():
    print('Testing NMF on synthetic data...')
    alpha = 1
    beta = 10
    n_components = 2
    shape = (8, 6)

    n = nmf.NMF(shape=shape, n_components=n_components, alpha=alpha, beta=beta)

    n_est = nmf.NMF(shape=n.shape,
                    n_components=n.n_components,
                    alpha=alpha,
                    beta=beta)
    elbo = n_est.fit(n.X, max_iter=2000)
    for i in range(len(elbo)):
        if i == 0:
            print(elbo[0])
        else:
            print('{} ; {}'.format(elbo[i], elbo[i] - elbo[i - 1]))
    print('Num. iterations: {}'.format(len(elbo) - 1))
    print('done.')
Example #3
0
def main():
    path = r'G:\School\thesis\DS4_T\wav_renamed'
    settings = "settings.set"
    WavFiles = glob.glob(path + '\\*.' + 'wav')
    for index in WavFiles:
        spec = readwav(index)
        nmfObject = nmf.NMF(spec[2], settings)
        nmfObject.printparas()
        nmfObject.decompose()
        nmfObject.writeresults("test1")
        os.system("pause")
        #show(spec)
    return
Example #4
0
def main():
    V = np.array([[1, 1, 1, 1, 1], [0, 1, 0, 1, 0], [0, 1, 0, 1, 0]])
    r = 2
    W, H = nmf.NMF(V, r, lamb=0, maxit=2000)
    Ve = np.dot(W, H)
    np.set_printoptions(precision=2, suppress=True)

    for i in range(r):
        Hi = np.zeros((r, np.shape(H)[1]))
        Hi[i, :] = H[i, :]
        TMP = np.dot(W, Hi)
        pl.imsave("V" + str(i) + ".png", TMP, cmap=pl.cm.gray)

    pl.imsave("V.png", V, cmap=pl.cm.gray)
    pl.imsave("W.png", W, cmap=pl.cm.gray)
    pl.imsave("H.png", H, cmap=pl.cm.gray)
    pl.imsave("Ve.png", Ve, cmap=pl.cm.gray)

    return 0
Example #5
0
	def init_nmf_with_kernels(self, kernels):
		## Given known clusters initialise the NMF so we can do the Frank-Wolfe estimation
		## for the unary rules.
		fidx = []
		fwords = []
		for i,w in enumerate(self.idx2word):
			fidx.append(i)
			fwords.append(w)
		fidx.append(self.alphabet_size)
		fwords.append(self.start_symbol)
		self.nmf = nmf.NMF(self.unigram_features[fidx,:], fwords, ssf=0)
		## This may not be the start symbol of the grammar.

		start_idx = len(fidx)-1
		self.nmf.start(start_idx)
		self.kernels = [ kernels[0] ]
		for a in kernels[1:]:
			ai = self.word2idx[a]
			self.nmf.add_basis(ai)
			self.kernels.append(a)
		self.nmf.initialise_frank_wolfe()
Example #6
0
with open('mnist.txt', 'r') as mFr:
    for line in mFr:
        a = line.split(',')
        b = []
        for item in a:
            b.append(float(item))
        categoryM.append(b[-1])
        mDataSet.append(b)
mFr.close()


calculate.calculate(kMeans.kMeans(gDataSet, 2), categoryG, 2)

calculate.calculate(kMeans.kMeans(mDataSet, 10), categoryM, 10)

calculate.calculate(nmf.NMF(gDataSet, 2), categoryG, 2)

calculate.calculate(nmf.NMF(mDataSet, 10), categoryG, 10)

calculate.calculate(spectral.spectral(gDataSet, 2, 3), categoryG, 2)

calculate.calculate(spectral.spectral(gDataSet, 2, 6), categoryG, 2)

calculate.calculate(spectral.spectral(gDataSet, 2, 9), categoryG, 2)

calculate.calculate(spectral.spectral(mDataSet, 10, 3), categoryM, 10)

calculate.calculate(spectral.spectral(mDataSet, 10, 6), categoryM, 10)

calculate.calculate(spectral.spectral(mDataSet, 10, 9), categoryM, 10)
Example #7
0
def _nmf():
    return nmf.NMF(files='texts')
Example #8
0
#W_v = np.eye(6700)

n = len(X)
m = len(X[0])

#X = np.array(X)
W_u = np.array(W_u)
W_v = np.array(W_v)

# Initialize the W_u & W_v
D_u = np.zeros(shape=(n, n))
D_v = np.zeros(shape=(m, m))

sum_W_u = np.sum(W_u, axis=1)
sum_W_v = np.sum(W_v, axis=1)

for i in range(n):
    D_u[i][i] = sum_W_u[i]

for h in range(m):
    D_v[h][h] = sum_W_v[h]

U = np.random.rand(n, k)
H = np.random.rand(k, k)
V = np.random.rand(m, k)

U_final, H_final, V_final = nmf.NMF(X, U, H, V, D_u, W_u, D_v, W_v)

print("\nU_final:\n\n", U_final)
print("\nH_final:\n\n", H_final)
print("\nV_final:\n\n", V_final)
Example #9
0
df = pd.read_pickle('articles.pkl')

content = df.content.as_matrix()

content = df.content.as_matrix()

n_topics = 7

# vectorize our content
vector = TfidfVectorizer(max_features=5000, stop_words='english')

vector_matrix = vector.fit_transform(content).toarray()

features = np.array(vector.get_feature_names())

our_nmf = nmf.NMF(vector_matrix, n_topics, max_iter=100)

W, H = our_nmf.fit()

### Using Your NMF Function
print "MSR: {}".format(our_nmf.msr())

for row in H:
    ind = row.argsort()[:-10 - 1:-1]
    print features[ind]

### Built-In NMF
print "\nsklearn.NMF\n"

skl_nmf = sklearn.decomposition.NMF(n_components=n_topics,
                                    random_state=1,