def create_matrix(self): matrix = [] # List that will hold the matrix threshold = float( self.combobox.get()[:-1]) # Gets the thershold from the combobox self.selected_districts = [ self.lb.get(i) for i in self.lb.curselection() ] # appends the districts that are selected in the listbox if self.selected_districts == []: # if no disticts are selected in the listbox adds all districts to the list for clustering self.selected_districts = self.data_center.list_of_districts parties = self.data_center.list_of_parties # Gets all the political parties from DataCenter for party_acr in parties: list_for_party = [] # Holds one of the rows of the matrix for district in self.selected_districts: try: if float( self.data_center.parties_dict[party_acr]. election_results[district] ) >= threshold: # filters out parties from disttricts whose vote percentage are below threshold list_for_party.append( float(self.data_center.parties_dict[party_acr]. election_results[district])) else: list_for_party.append(0.0) except KeyError: list_for_party.append( 0.0 ) # If the party isn't available in a certain district sets the vote percentage to 0 matrix.append(list_for_party) matrix = clusters.rotatematrix(matrix) return matrix
def cluster_district(self): self.state = "district" #if user clickes cluster districts state changes to district. self.analysis_frame.pack(side=TOP, fill=BOTH) self.canvas.delete("all") #clearing canvas # https://stackoverflow.com/questions/15839491/how-to-clear-tkinter-canvas self.party_list, self.district_list, self.data = clusters.readfile( "matrix.txt") new_data = clusters.rotatematrix(self.data) #we need to rotated matrix to cluster districts. clust = clusters.hcluster(new_data, distance=clusters.sim_distance) clusters.drawdendrogram(clust, self.district_list, jpeg='districts.jpg') self.insert_image("districts.jpg") #insert clustered image to canvas
def get_clusture(self, param): """ param - str -> Parameeter will be specified in self.writefiles if param is Country it will show Country clusters if param is Criterias it will show data clusters """ country_names, records, records_data = clusters.readfile( self.writed_names) if param == "Country": clust = clusters.hcluster(records_data) label = country_names elif param == "Criterias": rotated = clusters.rotatematrix(records_data) clust = clusters.hcluster(rotated) label = records self.jpg_names = 'clustured2.jpg' clusters.drawdendrogram(clust, labels=label, jpeg=self.jpg_names) self.show_image()
def cluster_poli(self, event): # function to cluster according to parties if self.run == 0: # checks if it is the first time that clustering has been made self.create_rest_of_gui() self.run += 1 self.update_idletasks() self.var.set( "party") # sets the variable for usage in refined analysis clust = clusters.hcluster( clusters.rotatematrix(self.create_matrix()), distance=sim_distance ) # calls a function from clusters.py to do the clustering clusters.drawdendrogram( clust, self.data_center.list_of_parties ) # calls a function from clusters.py to draw the dendogram self.create_rest_of_gui( ) # recreates the 2. GUI part so everything is reset self.img = ImageTk.PhotoImage(Image.open("clusters.jpg")) self.canvas.create_image( 0, 0, anchor=NW, image=self.img) # Inserts the dendogram to the canvas
####54页调用generatefeedvector生成blogdata文件失败。是因为feedlist里面的网址无法打开吗? ###downloadzebodata生成zebo.txt也失败。sigh import clusters blognames,words,data = clusters.readfile('blogdatadown.txt')#1 #clust = clusters.hcluster(data) #print (clust)#果然函数中这个值输出也都不一样呢。 #print(blognames) #clusters.printclust(clust, labels = blognames)#2 #clusters.drawdendrogram(clust, blognames, jpeg = 'blogclust.jpg')#3 rdata = clusters.rotatematrix(data)#4 wordclust = clusters.hcluster(rdata) clusters.drawdendrogram(wordclust, labels = words, jpeg = 'wordclust.jpg') ''' kclust = clusters.kcluster(data, k = 4)#5 print ([blognames[r] for r in kclust[0]]) print ([blognames[r] for r in kclust[1]]) import urllib.request#6 from bs4 import BeautifulSoup c = urllib.request.urlopen('https://en.wikipedia.org/wiki/Jon_Snow') soup = BeautifulSoup(c.read(),"lxml")#这里非常有趣! 感觉有空需要看下这个源代码库呀。 links = soup('a')#所以我还是不懂beautiful soup 的用法呀。 print(links[10]) print(links[10]['href']) #这一段是教BS的。
import clusters docs, words, data = clusters.readfile('titles_vectors.txt') rdata = clusters.rotatematrix(data) clust = clusters.hcluster(rdata, distance=clusters.pearson) print('clusters by pearson correlation') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg') clust = clusters.hcluster(rdata, distance=clusters.tanimoto) print('clusters by tanimoto coefficient') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg') clust = clusters.hcluster(rdata, distance=clusters.euclidean) print('clusters by euclidean distance') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
def ColumnClustering(): reload(clusters) blognames,words,data=clusters.readfile('blogdata1.txt') rdata=clusters.rotatematrix(data) wordclust=clusters.hcluster(rdata) clusters.drawdendrogram(wordclust,labels=words,jpeg='wordclust.jpg')
import clusters docs, words, data = clusters.readfile('titles_vectors.txt') distance_func = clusters.pearson print() print("Pearson coefficient between words") wordvectors = clusters.rotatematrix(data) for i in range(len(wordvectors) - 1): for j in range(i + 1, len(wordvectors)): dist = distance_func(wordvectors[i], wordvectors[j]) sim = 1.0 - dist print( 'distance between words <' + words[i - 1] + '> and <' + words[j - 1] + '>=', dist, ', and similarity =', sim)
def getKClusterRotated(inputFile, k): """Do K-means Clustering""" blognames, words, data = clusters.readfile(inputFile) kclust = clusters.kcluster(clusters.rotatematrix(data), k=k) return blognames, words, getNumbersToString(words, kclust)
def main(args): def usage(): print >> sys.stderr, "Usage:" print >> sys.stderr, "sctoolbox correlates common_tracks [user1] [user2]" print >> sys.stderr, "sctoolbox correlates pearson_tastes [user1] [user2]" print >> sys.stderr, "sctoolbox suggest [user] bestlikes [n]" print >> sys.stderr, "sctoolbox suggest [user] following_tournament [n]" print >> sys.stderr, "sctoolbox suggest [user] following_tournament_short [n]" print >> sys.stderr, "sctoolbox suggest [user] following_tournament [n] --nomix" print >> sys.stderr, "sctoolbox suggest [user] following_tournament_short [n] --nomix" print >> sys.stderr, "sctoolbox suggest [user] following_tournament_playlimit [n] --nomix [playlimit]" print >> sys.stderr, "sctoolbox suggest [user] following_tournament_playlimit [n] [playlimit]" print >> sys.stderr, "sctoolbox searchUser [username]" print >> sys.stderr, "sctoolbox searchTrack [trackname]" print >> sys.stderr, "sctoolbox getTrackScore [trackname]" print >> sys.stderr, "sctoolbox similar [trackname]" print >> sys.stderr, "sctoolbox draw_style_galaxy [user] [jpg_path]" paths = [] client = SCDB.register() ############################################################################## if len(args ) == 5 and args[1] == 'correlates' and args[2] == 'pearson_tastes': user1 = SCDB.searchForUser(client, args[3]) user2 = SCDB.searchForUser(client, args[4]) puser1 = SCDB.extractProfile(client, user1) puser2 = SCDB.extractProfile(client, user2) r = SCDB.comparePearson(puser1, puser2) print 'Correlation score between users (pearson):', r ############################################################################## ############################################################################## elif len(args ) == 5 and args[1] == 'correlates' and args[2] == 'common_tracks': user1 = SCDB.searchForUser(client, args[3]) user2 = SCDB.searchForUser(client, args[4]) puser1 = SCDB.extractProfile(client, user1) puser2 = SCDB.extractProfile(client, user2) r = SCDB.compareCommonTracks(puser1, puser2) print 'Correlation score between users (common tracks):', r ############################################################################## ############################################################################## elif len(args) == 5 and args[1] == 'suggest' and args[ 3] == 'following_tournament': print( 'Launching tournament between tracks from followings, might take a while...' ) user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowings(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4])) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 5 and args[1] == 'suggest' and args[3] == 'bestlikes': print( 'Rating tracks user liked, reposted, or commented, and playlisted, might take a while...' ) user = SCDB.searchForUser(client, args[2]) profile = SCDB.sortProfileFromFollowings(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4])) print(args[2] + " best likes are:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 5 and args[1] == 'suggest' and args[ 3] == 'following_tournament_short': print('Launching short tournament between tracks from followings...') user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowingsShort(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4])) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 6 and args[1] == 'suggest' and args[ 3] == 'following_tournament' and args[5] == '--nomix': print( 'Launching tournament between tracks from followings, might take a while...' ) user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowings(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4]), no_mix=True) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 6 and args[1] == 'suggest' and args[ 3] == 'following_tournament_short' and args[5] == '--nomix': print('Launching short tournament between tracks from followings...') user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowingsShort(client, user) print('Generating big profile...') suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4]), no_mix=True) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 7 and args[1] == 'suggest' and args[ 3] == 'following_tournament_playlimit' and args[5] == '--nomix': print('Launching custom tournament between tracks from followings...') user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowings(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4]), no_mix=True, played_limit=int(args[6])) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 6 and args[1] == 'suggest' and args[ 3] == ' following_tournament_playlimit ': print('Launching custom tournament between tracks from followings...') user = SCDB.searchForUser(client, args[2]) profile = SCDB.profileFollowings(client, user) suggestions = SCDB.getSuggestionsFromProfile(client, profile, int(args[4]), no_mix=False, played_limit=int(args[5])) print(args[2] + " should like these tracks:") for item in suggestions: print item ############################################################################## ############################################################################## elif len(args) == 3 and args[1] == 'searchUser': container = client.get('/users', q=args[2]) n = 1 for item in container: print('############################') print('#' + str(n)) n += 1 print('username:'******'permalink:' + item.permalink) print('############################') ############################################################################## ############################################################################## elif len(args) == 4 and args[1] == 'draw_style_galaxy': print('Identifying user...') user = SCDB.searchForUser(client, args[2]) print('Downloading followers list...') followers_list = SCDB.getFollowerList(client, user) followers_list = SCDB.exctractsample(followers_list) row, col, data = SCDB.getCommentsData(client, followers_list) print('Generating clusters...') rotdata = clusters.rotatematrix(data) tagclust = clusters.hcluster(rotdata) print("Generationg dendrogram drawing...") clusters.drawdendrogram(tagclust, col, jpeg=args[3]) ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## else: usage()