def create_matrix(self):
     matrix = []  # List that will hold the matrix
     threshold = float(
         self.combobox.get()[:-1])  # Gets the thershold from the combobox
     self.selected_districts = [
         self.lb.get(i) for i in self.lb.curselection()
     ]  # appends the districts that are selected in the listbox
     if self.selected_districts == []:  # if no disticts are selected in the listbox adds all districts to the list for clustering
         self.selected_districts = self.data_center.list_of_districts
     parties = self.data_center.list_of_parties  # Gets all the political parties from DataCenter
     for party_acr in parties:
         list_for_party = []  # Holds one of the rows of the matrix
         for district in self.selected_districts:
             try:
                 if float(
                         self.data_center.parties_dict[party_acr].
                         election_results[district]
                 ) >= threshold:  # filters out parties from disttricts whose vote percentage are below threshold
                     list_for_party.append(
                         float(self.data_center.parties_dict[party_acr].
                               election_results[district]))
                 else:
                     list_for_party.append(0.0)
             except KeyError:
                 list_for_party.append(
                     0.0
                 )  # If the party isn't available in a certain district sets the vote percentage to 0
         matrix.append(list_for_party)
     matrix = clusters.rotatematrix(matrix)
     return matrix
 def cluster_district(self):
     self.state = "district"
     #if user clickes cluster districts state changes to district.
     self.analysis_frame.pack(side=TOP, fill=BOTH)
     self.canvas.delete("all")  #clearing canvas
     # https://stackoverflow.com/questions/15839491/how-to-clear-tkinter-canvas
     self.party_list, self.district_list, self.data = clusters.readfile(
         "matrix.txt")
     new_data = clusters.rotatematrix(self.data)
     #we need to rotated matrix to cluster districts.
     clust = clusters.hcluster(new_data, distance=clusters.sim_distance)
     clusters.drawdendrogram(clust,
                             self.district_list,
                             jpeg='districts.jpg')
     self.insert_image("districts.jpg")  #insert clustered image to canvas
예제 #3
0
 def get_clusture(self, param):
     """
     param - str -> Parameeter will be specified in self.writefiles
     if param is Country it will show Country clusters
     if param is Criterias it will show data clusters
     """
     country_names, records, records_data = clusters.readfile(
         self.writed_names)
     if param == "Country":
         clust = clusters.hcluster(records_data)
         label = country_names
     elif param == "Criterias":
         rotated = clusters.rotatematrix(records_data)
         clust = clusters.hcluster(rotated)
         label = records
     self.jpg_names = 'clustured2.jpg'
     clusters.drawdendrogram(clust, labels=label, jpeg=self.jpg_names)
     self.show_image()
 def cluster_poli(self, event):  # function to cluster according to parties
     if self.run == 0:  # checks if it is the first time that clustering has been made
         self.create_rest_of_gui()
         self.run += 1
     self.update_idletasks()
     self.var.set(
         "party")  # sets the variable for usage in refined analysis
     clust = clusters.hcluster(
         clusters.rotatematrix(self.create_matrix()), distance=sim_distance
     )  # calls a function from clusters.py to do the clustering
     clusters.drawdendrogram(
         clust, self.data_center.list_of_parties
     )  # calls a function from clusters.py to draw the dendogram
     self.create_rest_of_gui(
     )  # recreates the 2. GUI part so everything is reset
     self.img = ImageTk.PhotoImage(Image.open("clusters.jpg"))
     self.canvas.create_image(
         0, 0, anchor=NW,
         image=self.img)  # Inserts the dendogram to the canvas
예제 #5
0
####54页调用generatefeedvector生成blogdata文件失败。是因为feedlist里面的网址无法打开吗?
###downloadzebodata生成zebo.txt也失败。sigh
import clusters

blognames,words,data = clusters.readfile('blogdatadown.txt')#1
#clust = clusters.hcluster(data)
#print (clust)#果然函数中这个值输出也都不一样呢。
#print(blognames)

#clusters.printclust(clust, labels = blognames)#2

#clusters.drawdendrogram(clust, blognames, jpeg = 'blogclust.jpg')#3

rdata = clusters.rotatematrix(data)#4
wordclust = clusters.hcluster(rdata)
clusters.drawdendrogram(wordclust, labels = words, jpeg = 'wordclust.jpg')
'''
kclust = clusters.kcluster(data, k = 4)#5
print ([blognames[r] for r in kclust[0]])
print ([blognames[r] for r in kclust[1]])

import urllib.request#6
from bs4 import BeautifulSoup
c = urllib.request.urlopen('https://en.wikipedia.org/wiki/Jon_Snow')
soup =  BeautifulSoup(c.read(),"lxml")#这里非常有趣! 感觉有空需要看下这个源代码库呀。
links = soup('a')#所以我还是不懂beautiful soup 的用法呀。
print(links[10])
print(links[10]['href'])
#这一段是教BS的。
예제 #6
0
import clusters

docs, words, data = clusters.readfile('titles_vectors.txt')
rdata = clusters.rotatematrix(data)

clust = clusters.hcluster(rdata, distance=clusters.pearson)
print('clusters by pearson correlation')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg')

clust = clusters.hcluster(rdata, distance=clusters.tanimoto)
print('clusters by tanimoto coefficient')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg')

clust = clusters.hcluster(rdata, distance=clusters.euclidean)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
예제 #7
0
파일: run.py 프로젝트: wz125/courses
def ColumnClustering():
  reload(clusters)
  blognames,words,data=clusters.readfile('blogdata1.txt')
  rdata=clusters.rotatematrix(data)
  wordclust=clusters.hcluster(rdata)
  clusters.drawdendrogram(wordclust,labels=words,jpeg='wordclust.jpg')
예제 #8
0
import clusters

docs, words, data = clusters.readfile('titles_vectors.txt')
distance_func = clusters.pearson

print()
print("Pearson coefficient between words")
wordvectors = clusters.rotatematrix(data)

for i in range(len(wordvectors) - 1):
    for j in range(i + 1, len(wordvectors)):
        dist = distance_func(wordvectors[i], wordvectors[j])
        sim = 1.0 - dist
        print(
            'distance between words <' + words[i - 1] + '> and <' +
            words[j - 1] + '>=', dist, ', and similarity =', sim)
예제 #9
0
def getKClusterRotated(inputFile, k):
    """Do K-means Clustering"""

    blognames, words, data = clusters.readfile(inputFile)
    kclust = clusters.kcluster(clusters.rotatematrix(data), k=k)
    return blognames, words, getNumbersToString(words, kclust)
예제 #10
0
def main(args):
    def usage():
        print >> sys.stderr, "Usage:"
        print >> sys.stderr, "sctoolbox correlates common_tracks [user1] [user2]"
        print >> sys.stderr, "sctoolbox correlates pearson_tastes [user1] [user2]"
        print >> sys.stderr, "sctoolbox suggest [user] bestlikes [n]"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament [n]"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament_short [n]"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament [n] --nomix"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament_short [n] --nomix"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament_playlimit [n] --nomix [playlimit]"
        print >> sys.stderr, "sctoolbox suggest [user] following_tournament_playlimit [n] [playlimit]"
        print >> sys.stderr, "sctoolbox searchUser [username]"
        print >> sys.stderr, "sctoolbox searchTrack [trackname]"
        print >> sys.stderr, "sctoolbox getTrackScore [trackname]"
        print >> sys.stderr, "sctoolbox similar [trackname]"
        print >> sys.stderr, "sctoolbox draw_style_galaxy [user] [jpg_path]"

    paths = []

    client = SCDB.register()

    ##############################################################################
    if len(args
           ) == 5 and args[1] == 'correlates' and args[2] == 'pearson_tastes':
        user1 = SCDB.searchForUser(client, args[3])
        user2 = SCDB.searchForUser(client, args[4])

        puser1 = SCDB.extractProfile(client, user1)
        puser2 = SCDB.extractProfile(client, user2)

        r = SCDB.comparePearson(puser1, puser2)

        print 'Correlation score between users (pearson):', r
    ##############################################################################

    ##############################################################################
    elif len(args
             ) == 5 and args[1] == 'correlates' and args[2] == 'common_tracks':
        user1 = SCDB.searchForUser(client, args[3])
        user2 = SCDB.searchForUser(client, args[4])

        puser1 = SCDB.extractProfile(client, user1)
        puser2 = SCDB.extractProfile(client, user2)

        r = SCDB.compareCommonTracks(puser1, puser2)

        print 'Correlation score between users (common tracks):', r
    ##############################################################################

    ##############################################################################
    elif len(args) == 5 and args[1] == 'suggest' and args[
            3] == 'following_tournament':
        print(
            'Launching tournament between tracks from followings, might take a while...'
        )
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowings(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client, profile,
                                                     int(args[4]))
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 5 and args[1] == 'suggest' and args[3] == 'bestlikes':
        print(
            'Rating tracks user liked, reposted, or commented, and playlisted, might take a while...'
        )
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.sortProfileFromFollowings(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client, profile,
                                                     int(args[4]))
        print(args[2] + " best likes are:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 5 and args[1] == 'suggest' and args[
            3] == 'following_tournament_short':
        print('Launching short tournament between tracks from followings...')
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowingsShort(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client, profile,
                                                     int(args[4]))
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 6 and args[1] == 'suggest' and args[
            3] == 'following_tournament' and args[5] == '--nomix':
        print(
            'Launching tournament between tracks from followings, might take a while...'
        )
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowings(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client,
                                                     profile,
                                                     int(args[4]),
                                                     no_mix=True)
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 6 and args[1] == 'suggest' and args[
            3] == 'following_tournament_short' and args[5] == '--nomix':
        print('Launching short tournament between tracks from followings...')
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowingsShort(client, user)
        print('Generating big profile...')
        suggestions = SCDB.getSuggestionsFromProfile(client,
                                                     profile,
                                                     int(args[4]),
                                                     no_mix=True)
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 7 and args[1] == 'suggest' and args[
            3] == 'following_tournament_playlimit' and args[5] == '--nomix':
        print('Launching custom tournament between tracks from followings...')
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowings(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client,
                                                     profile,
                                                     int(args[4]),
                                                     no_mix=True,
                                                     played_limit=int(args[6]))
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 6 and args[1] == 'suggest' and args[
            3] == ' following_tournament_playlimit ':
        print('Launching custom tournament between tracks from followings...')
        user = SCDB.searchForUser(client, args[2])
        profile = SCDB.profileFollowings(client, user)
        suggestions = SCDB.getSuggestionsFromProfile(client,
                                                     profile,
                                                     int(args[4]),
                                                     no_mix=False,
                                                     played_limit=int(args[5]))
        print(args[2] + " should like these tracks:")
        for item in suggestions:
            print item
    ##############################################################################

    ##############################################################################
    elif len(args) == 3 and args[1] == 'searchUser':
        container = client.get('/users', q=args[2])
        n = 1
        for item in container:
            print('############################')
            print('#' + str(n))
            n += 1
            print('username:'******'permalink:' + item.permalink)
        print('############################')
    ##############################################################################

    ##############################################################################
    elif len(args) == 4 and args[1] == 'draw_style_galaxy':
        print('Identifying user...')
        user = SCDB.searchForUser(client, args[2])
        print('Downloading followers list...')
        followers_list = SCDB.getFollowerList(client, user)
        followers_list = SCDB.exctractsample(followers_list)
        row, col, data = SCDB.getCommentsData(client, followers_list)
        print('Generating clusters...')
        rotdata = clusters.rotatematrix(data)
        tagclust = clusters.hcluster(rotdata)
        print("Generationg dendrogram drawing...")
        clusters.drawdendrogram(tagclust, col, jpeg=args[3])
    ##############################################################################

    ##############################################################################
    ##############################################################################

    ##############################################################################
    ##############################################################################

    ##############################################################################
    ##############################################################################

    ##############################################################################
    ##############################################################################

    else:
        usage()
예제 #11
0
def getKClusterRotated(inputFile, k):
    """Do K-means Clustering"""

    blognames, words, data = clusters.readfile(inputFile)
    kclust = clusters.kcluster(clusters.rotatematrix(data), k=k)
    return blognames, words, getNumbersToString(words, kclust)