def reCluster(clusterList): ''' Resort the clusters according to which centroid is closest Param: clusterList - a list of every cluster to be reclustered Return: clusterList - the resorted list of clusters avg - the average distance of a tag and it's new cluster after resorting ''' iterCount = 0 avg = 0 for cluster in clusterList: cluster.wipeMembers() tagList = TAG_DICT.keys() for tag in tagList: vector = TAG_DICT[tag] matchTuple = sorted(clusterMatch(clusterList, vector), key=lambda tup: tup[1], reverse=False)[0] if matchTuple[0] > 0: bestIndex = matchTuple[0] avg += matchTuple[1] clusterList[bestIndex].addMember(tag, vector) iterCount += 1 # if iterCount % 500 == 0: # print(matchTuple[1], tag, matchTuple[0]) avg = avg / iterCount for cluster in clusterList: cluster.setCentroid() return clusterList, avg
def reCluster(clusterList): ''' Resort the clusters according to which centroid is closest Param: clusterList - a list of every cluster to be reclustered Return: clusterList - the resorted list of clusters avg - the average distance of a tag and it's new cluster after resorting ''' iterCount = 0 avg = 0 for cluster in clusterList: cluster.wipeMembers() tagList = TAG_DICT.keys() for tag in tagList: vector = TAG_DICT[tag] matchTuple = sorted(clusterMatch(clusterList, vector), key=lambda tup: tup[1], reverse=False)[0] if matchTuple[0] > 0: bestIndex = matchTuple[0] avg += matchTuple[1] clusterList[bestIndex].addMember(tag, vector) iterCount +=1 # if iterCount % 500 == 0: # print(matchTuple[1], tag, matchTuple[0]) avg = avg/iterCount for cluster in clusterList: cluster.setCentroid() return clusterList, avg
def randomCluster(numClusters, tagDict): ''' Set up for recursive clustering, randomly distributing urls across n clusters Param: numClusters - the number of clusters to randomly distribute tag vectors across Return: clusteredUrls - a list of size numClusters of TagCluster objects ''' clusterList = [tumblruser.TagCluster() for x in range(numClusters)] for entry in tagDict: loc = random.randint(0, numClusters - 1) clusterList[loc].addMember(entry, tagDict[entry]) for cluster in clusterList: cluster.setCentroid() return clusterList