def Get_Best_Centroids(k, iterations): print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0 bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion return bestCentroids, bestDistortion
def feature_selection(args): ''' run clustering on a range of k's''' print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) kMin = args.k_min kMax = args.k_max kHop = args.k_hop mfccs = feature_holder.get_feature('mfcc') nmfcc = len(mfccs) print "N MFCCS:", nmfcc results = [] for k in range(kMin, kMax, kHop): print "Running k-Means with k=%d" % (k) if k >= nmfcc: print "WARNING! k is greater than the number of samples!" centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) J0 = calcJ(mfccs, classes, centroids, k) results.append((k, distortion, dist, J0)) plot.plot_feature_selection(kMin, kMax, kHop, results)
def Get_Best_Centroids(k, iterations): feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0 bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion ''' plt.close() fig, (ax1) = plt.subplots(1) ax1.plot(j_measures) ax1.set_title("J measures over multiple iterations of k") ax1.set_xlabel("iterations") ax1.set_ylabel("J-measure values") plt.show() ''' return bestCentroids, bestDistortion
def Get_Best_Centroids(k, iterations): feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0; bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion ''' plt.close() fig, (ax1) = plt.subplots(1) ax1.plot(j_measures) ax1.set_title("J measures over multiple iterations of k") ax1.set_xlabel("iterations") ax1.set_ylabel("J-measure values") plt.show() ''' return bestCentroids, bestDistortion
def Get_Best_Centroids(k, iterations): print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0; bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion return bestCentroids, bestDistortion
def feature_selection(args): ''' run clustering on a range of k's''' print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) kMin = args.k_min kMax = args.k_max kHop = args.k_hop mfccs = feature_holder.get_feature('mfcc') nmfcc = len(mfccs) print "N MFCCS:", nmfcc results = [] for k in range(kMin, kMax, kHop): print "Running k-Means with k=%d" % (k) if k >= nmfcc: print "WARNING! k is greater than the number of samples!" centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) J0 = calcJ(mfccs, classes, centroids, k) results.append( (k, distortion, dist, J0) ) plot.plot_feature_selection(kMin, kMax, kHop, results)