コード例 #1
0
def compute_silhouette_block_scales(X, ward, scales, metric_measure, verbose=False):

    if verbose == True:
        print "Computing silhouette score :"
        print "Scales : ", scales
    sil = np.zeros(len(scales))
    for i in np.arange(len(scales)):
        height_cut = scales[i]
        if verbose:
            print ".....scales: ", height_cut
        num_clusters = len(ward.cut(height_cut))
        label = _hc_cut(num_clusters, ward.children_, ward.n_leaves_)
        iterations = 1
        temp = np.zeros(iterations)
        for t in range(iterations):
            st = cpu_time()
            score = silhouette_score_block(X, label, metric=metric_measure, sample_size=5000, n_jobs=2)
            t = cpu_time() - st
            if verbose:
                print "......time", t
            temp[t] = score
        sil[i] = temp[:].mean(0)
        if verbose:
            print ".....score: ", score
    mean = sil[:].mean(0)
    std = sil[:].std(0)
    return mean, std
コード例 #2
0
def compute_silhouette_score(X, tree, metric_measure):
	'''
	n : sample sizes |X|
	num of clusters, k = [1..n]
	for each value of k
	      P_k: partition of X having k cluster (based on the maximum distance (or the radius) of a cluster) 
	      compute silhouette score for P_k 

	input:  X : data 
		tree: ward tree
		matric_measure ('euclidean', ...)
	output: float array 1D size n
		value of silhouette score of partion P_k 
	'''
    	n = len(X)
    	score = np.zeros(n-1)
	print 'Length : ', n

    	for i in range(n-1):
       	#canot calculate the silhouette score for only one cluster
           #should start from 2 clusters 
           k = i + 2
		print '\n Cutting at k = ', k
        	label = _hc_cut(k,tree.children_, tree.n_leaves_)
		print '\n Compute score ...'
        	s = metrics.silhouette_score(X, label, metric = metric_measure)
		#s = silhouette_score_block(X, label, metric= metric_measure , sample_size=None	)	
		score[k-2] = s
コード例 #3
0
def compute_silhouette_block_tree(X, ward, metric_measure, verbose=False):
    # extract all cut of H
    num_cuts = ward.height_[len(ward.children_) + ward.n_leaves_ - 1]
    if verbose == True:
        print "Computing silhouette score :"
        print "Height : ", num_cuts
    sil = np.zeros(num_cuts)
    for height_cut in np.arange(num_cuts):
        if verbose:
            print ".....scales: ", height_cut
        num_clusters = len(ward.cut(height_cut))
        label = _hc_cut(num_clusters, ward.children_, ward.n_leaves_)
        iterations = 4
        temp = np.zeros(interations)
        for t in range(iterations):
            score = silhouette_score_block(X, label, metric=metric_measure, sample_size=50000, n_jobs=2)
            temp[t] = score
        sil[height_cut] = temp[:].mean(0)
        if verbose:
            print ".....score: ", score
    mean = sil[:].mean(0)
    std = sil[:].std(0)
    return mean, std