def fast_clustering(coords): MAX_RMSD = 30 #Max RMSD to be considered in a cluster. for MAX_RMSD in [ 10, 15, 20, 25, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 30 ]: clusters = [] i_to_clust = [] for i, coord in enumerate(coords): for j, cluster in enumerate(clusters): if ftur.rmsd(coord, coords[cluster[0]]) < MAX_RMSD: cluster.append(i) i_to_clust.append(j) break else: clusters.append([i]) i_to_clust.append(len(clusters) - 1) print("{} maxRMSD, {} clusters".format(MAX_RMSD, len(clusters))) print("Starting dists") dists = np.zeros((len(coords), len(coords))) for i, j in it.combinations(range(len(coords)), 2): if i_to_clust[i] == i_to_clust[j]: dists[i, j] = ftur.rmsd(coords[i], coords[j]) else: dists[i, j] = float('inf') return i_to_clust, dists
def topN_closest_cluster_nodes(self, n, img_index): query_cluster = self.labels[img_index] i = 0 cluster = [] for label in self.labels: if label == query_cluster and i != img_index: cluster.append(i) i += 1 dist = dict( map( lambda x: (x, distance.euclidean(self.data[img_index], self.data[x])), cluster)) tops = sorted(dist.items(), key=itemgetter(1))[:n] return tops
def outwrite(vocab,labels, outpath): outfile = open(outpath,'w') labels1 = [] for i in labels: try: labels1.append(i[0]) except: labels1.append(i) for i in set(labels1): cluster = [] str1 = '' for (word,label) in itertools.izip(vocab,labels): if label==i: cluster.append(word) str1 = str1 + word + ' ' outfile.write("Cluster "+str(i)+":\t" + str1+'\n')
def find_boundarieskmeans(binarylist, numclusters, clusterimportance): ''' :param binarylist: list of clustering labels :param numclusters: number of clusters to identify :param clusterimportance: mu as discussed in thesis, normally expected segmentsize*0.5 :return:boundaries detected using kboundaries ''' def customdifference(point1, point2): if point1[0] == point2[0]: modifier = 0 else: modifier = clusterimportance return abs(point1[1] - point2[1]) + modifier size = len(binarylist) / numclusters centers = [ (i, size * i + size / 2) for i in range(numclusters) ] #Initial guess is evenly spread centroids. This should conform to our expectation for i in range(25): clusters = [([], centers[i]) for i in range(numclusters)] for j in range(len(binarylist)): value = binarylist[j] point = [value, j] bestcenter = min( centers, key=lambda x: customdifference(point, x)) #Allocation step for cluster, center in clusters: if center == bestcenter: cluster.append(point) #Updating center centers = [] for cluster, center in clusters: if len(cluster) > 0: clustervalue = np.mean([x[0] for x in cluster]) numericcenter = np.mean([x[1] for x in cluster]) else: clustervalue = int(round(random.random() * numclusters)) numericcenter = random.random() * len(binarylist) centers.append([int(round(clustervalue)), numericcenter]) #Here we get boundaries from the centers boundaries = boundariesfromcenters(clusters) return boundaries
def fast_clustering(coords): MAX_RMSD=30 #Max RMSD to be considered in a cluster. for MAX_RMSD in [10,15,20,25,35,40,45,50,55,60,65,70,75,80,30]: clusters = [] i_to_clust=[] for i, coord in enumerate(coords): for j, cluster in enumerate(clusters): if ftur.rmsd(coord, coords[cluster[0]])<MAX_RMSD: cluster.append(i) i_to_clust.append(j) break else: clusters.append([i]) i_to_clust.append(len(clusters)-1) print("{} maxRMSD, {} clusters".format(MAX_RMSD, len(clusters))) print("Starting dists") dists = np.zeros((len(coords), len(coords))) for i,j in it.combinations(range(len(coords)),2): if i_to_clust[i]==i_to_clust[j]: dists[i,j]=ftur.rmsd(coords[i], coords[j]) else: dists[i,j]= float('inf') return i_to_clust, dists
def filter_isolated_idxs_new(idxs=[], maxdist=3.0): clusters = [] cluster = [] newidxs = np.zeros_like(idxs) for i in xrange(len(idxs)): if idxs[i]: cluster.append(i) if (i == len(idxs) or not idxs[i]) and len(cluster): clusters.append(cluster) cluster=[] for i in xrange(len(clusters)): valid = True if len(clusters[i]) <= 1: valid = False if valid: newidxs[clusters[i]] = True #print clusters[i], m.lcs[0][clusters[i]] return newidxs
def _expand_cluster(self, sample_i, neighbors): cluster = [sample_i] # Iterate through neighbors for neighbor_i in neighbors: if not neighbor_i in self.visited_samples: self.visited_samples.append(neighbor_i) # Fetch the samples distant neighbors self.neighbors[neighbor_i] = self._get_neighbors(neighbor_i) # Make sure the neighbors neighbors are more than min_samples if len(self.neighbors[neighbor_i]) >= self.min_samples: # Choose neighbors of neighbor except for sample distant_neighbors = self.neighbors[neighbor_i][np.where( self.neighbors[neighbor_i] != sample_i)] # Add the neighbors neighbors as neighbors of sample self.neighbors[sample_i] = np.concatenate( (self.neighbors[sample_i], distant_neighbors)) # Expand the cluster from the neighbor expanded_cluster = self._expand_cluster( neighbor_i, self.neighbors[neighbor_i]) # Add expanded cluster to this cluster cluster = cluster + expanded_cluster if not neighbor_i in np.array(self.clusters): cluster.append(neighbor_i) return cluster
# Welcher Datensatz das Cluster-Zentrum bildet centers_indices = aff_prop_cluster.cluster_centers_indices_ print "--labels_---------------" print [ lab for lab in labels ] print "-- cluster_centers_indices_ -------" print [i for i in centers_indices] cluster = [] for c in centers_indices: cluster.append([]) for i, label in enumerate(labels): #print names[i]," : ", label cluster[label].append( [ i, symbols[i], names[i]] ) print "cluster" print cluster for j, cl in enumerate(cluster): # Bessere Ansicht # plt.figure()