def compute_silhouette_block_scales(X, ward, scales, metric_measure, verbose=False): if verbose == True: print "Computing silhouette score :" print "Scales : ", scales sil = np.zeros(len(scales)) for i in np.arange(len(scales)): height_cut = scales[i] if verbose: print ".....scales: ", height_cut num_clusters = len(ward.cut(height_cut)) label = _hc_cut(num_clusters, ward.children_, ward.n_leaves_) iterations = 1 temp = np.zeros(iterations) for t in range(iterations): st = cpu_time() score = silhouette_score_block(X, label, metric=metric_measure, sample_size=5000, n_jobs=2) t = cpu_time() - st if verbose: print "......time", t temp[t] = score sil[i] = temp[:].mean(0) if verbose: print ".....score: ", score mean = sil[:].mean(0) std = sil[:].std(0) return mean, std
def compute_silhouette_score(X, tree, metric_measure): ''' n : sample sizes |X| num of clusters, k = [1..n] for each value of k P_k: partition of X having k cluster (based on the maximum distance (or the radius) of a cluster) compute silhouette score for P_k input: X : data tree: ward tree matric_measure ('euclidean', ...) output: float array 1D size n value of silhouette score of partion P_k ''' n = len(X) score = np.zeros(n-1) print 'Length : ', n for i in range(n-1): #canot calculate the silhouette score for only one cluster #should start from 2 clusters k = i + 2 print '\n Cutting at k = ', k label = _hc_cut(k,tree.children_, tree.n_leaves_) print '\n Compute score ...' s = metrics.silhouette_score(X, label, metric = metric_measure) #s = silhouette_score_block(X, label, metric= metric_measure , sample_size=None ) score[k-2] = s
def compute_silhouette_block_tree(X, ward, metric_measure, verbose=False): # extract all cut of H num_cuts = ward.height_[len(ward.children_) + ward.n_leaves_ - 1] if verbose == True: print "Computing silhouette score :" print "Height : ", num_cuts sil = np.zeros(num_cuts) for height_cut in np.arange(num_cuts): if verbose: print ".....scales: ", height_cut num_clusters = len(ward.cut(height_cut)) label = _hc_cut(num_clusters, ward.children_, ward.n_leaves_) iterations = 4 temp = np.zeros(interations) for t in range(iterations): score = silhouette_score_block(X, label, metric=metric_measure, sample_size=50000, n_jobs=2) temp[t] = score sil[height_cut] = temp[:].mean(0) if verbose: print ".....score: ", score mean = sil[:].mean(0) std = sil[:].std(0) return mean, std