Ejemplo n.º 1
0
def nnmf_analysis():
    if len(sys.argv) == 1:
        target_year = 1997
    else:
        target_year = int(sys.argv[1])
    fname = str(target_year) + '-' + str(target_year+1) + '.xls'
    
    X, flabels, PLY, ptr1, ptr2, ptr3 = parse_stats(fname)
    # pos = {C, F, G}
    ptr1 = np.array(X[:,0]==1).transpose()
    ptr2 = np.array(X[:,0]==2).transpose()
    ptr3 = np.array(X[:,0]==3).transpose()
    X = normalize(X)
    
    # NNMF
    w, h = nnmf.factorize(X[:,1:], pc=3, iter=100)
    print np.shape(h)
    print np.shape(w)
    print
    print h[:,0]
    print
    print h[:,1]
    print
    print h[:,2]
    print
    print h[:,3]
    
    return
Ejemplo n.º 2
0
def recommend(article, n, nofeatures, max_iter=50):
	#Factorize the matrix, make features
	word_dictionary, article_words, paper_titles = makeFeatures()
	wordmatrix,wordvec = makematrix(word_dictionary, article_words)

	plt.plot(sorted(word_dictionary.values()))
	plt.savefig("WordFrequencies.png")

	w,h = nnmf.factorize(np.matrix(wordmatrix), nofeatures, max_iter)

	print "Recommendations for article: "
	print article
	print "-----------------------------"
	tofind = unicode(paper_titles.index(article))
	nearest = nn.findnearest(w[tofind,:], w, paper_titles, 1, 0)
	nn.printnearest(nearest, 5)
Ejemplo n.º 3
0
def visualizeFactorization(nofeatures=5, max_iter=10):
	word_dictionary, article_words, paper_titles = makeFeatures()
	wordmatrix,wordvec = makematrix(word_dictionary, article_words)
	w,h = nnmf.factorize(np.matrix(wordmatrix), nofeatures, max_iter)


	toppatterns=[[] for i in range(len(paper_titles))]
	patternnames=[]

	pc, wc = np.shape(h)
	for i in range(pc):
		slist=[]
		# Create a list of words and their weights
		for j in range(wc):
			slist.append((h[i,j],wordvec[j]))
		# Reverse sort the word list
		slist.sort()
		slist.reverse()
		# Print the first six elements
		n=[s[1] for s in slist[0:10]]
		
		print "----------------------------------------"
		print "Features/Topic Found"
		print str(i) + str(n)

		print "Top 3 Articles Closest Matching Feature/Topic"
		flist=[]
		for j in range(len(paper_titles)):
			# Add the article with its weight
			flist.append((w[j,i],paper_titles[j]))
			toppatterns[j].append((w[j,i],i,paper_titles[j]))
			# Reverse sort the list
		flist.sort()
		flist.reverse()
		# Show the top 3 art
		for f in flist[0:5]:
			print f
Ejemplo n.º 4
0
  print rows
  print

  
  # Extract the volume field from every line
  prices[t]=[float(r.split(',')[5]) for r in rows[1:] if r.strip()!='']
  if len(prices[t])<shortest: shortest=len(prices[t])
  
  if not dates:
    dates=[r.split(',')[0] for r in rows[1:] if r.strip()!='']

l1=[[prices[tickers[i]][j] 
     for i in range(len(tickers))] 
    for j in range(shortest)]

w,h=nnmf.factorize(matrix(l1),pc=5)

print "Printing h"
print

print h

print "Printing w"
print w

print "Exiting"
exit()

# Loop over all the features
for i in range(shape(h)[0]):
  print "Feature %d" %i
Ejemplo n.º 5
0
def showarticles(titles, toppatterns, patternnames, out='articles.txt'):
    outfile = file(out, 'w')

    # Loop over all the articles
    for j in range(len(titles)):
        outfile.write(titles[j].encode('utf8') + '\n')

        # Get the top features for this article and
        # reverse sort them
        toppatterns[j].sort()
        toppatterns[j].reverse()

        # Print the top three patterns
        for i in range(3):
            outfile.write(
                str(toppatterns[j][i][0]) + ' ' +
                str(patternnames[toppatterns[j][i][1]]) + '\n')
        outfile.write('\n')

    outfile.close()


#allwords,articlewords,articletitles = getarticlewords()
#makematrix(allw=allwords,articlew=articletitles)

m1 = matrix([[1, 2, 3], [4, 5, 6]])
m2 = matrix([[1, 2], [3, 4], [5, 6]])
print m1 * m2
w, h = nnmf.factorize(m1 * m2, pc=3, iter=100)