def Get_Best_Centroids(k, iterations): print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0 bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion return bestCentroids, bestDistortion
def clustering(args): ''' run clustering on a single k''' print "Feature Analysis/Clustering Mode: single k" feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) sones_holder = featurevector.feature_holder(filename=SONE_VECTOR_FILENAME) k = args.k print feature_holder mfccs = feature_holder.get_feature('mfcc') print sones_holder sones = sones_holder.get_feature('sones') centroids, distortion = Get_Best_Centroids(k, 1) print "Distortion for this run: %0.3f" % (distortion) classes, dist = kmeans.scipy_vq(mfccs, centroids) # Get the inter class dist matrix inter_class_dist_matrix = mir_utils.GetSquareDistanceMatrix(centroids) eventBeginnings = feature_holder.get_event_start_indecies() # write audio if given -w if args.plot_segments: PlotWaveformWClasses(k, feature_holder, classes) if args.write_audio_results: WriteAudioFromClasses(k, feature_holder, classes) plot.plot(mfccs, sones, eventBeginnings, centroids, inter_class_dist_matrix, classes)
def feature_selection(args): ''' run clustering on a range of k's''' print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) kMin = args.k_min kMax = args.k_max kHop = args.k_hop mfccs = feature_holder.get_feature('mfcc') nmfcc = len(mfccs) print "N MFCCS:", nmfcc results = [] for k in range(kMin, kMax, kHop): print "Running k-Means with k=%d" % (k) if k >= nmfcc: print "WARNING! k is greater than the number of samples!" centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) J0 = calcJ(mfccs, classes, centroids, k) results.append((k, distortion, dist, J0)) plot.plot_feature_selection(kMin, kMax, kHop, results)
def Get_Best_Centroids(k, iterations): feature_holder = featurevector.feature_holder( filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0 bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion ''' plt.close() fig, (ax1) = plt.subplots(1) ax1.plot(j_measures) ax1.set_title("J measures over multiple iterations of k") ax1.set_xlabel("iterations") ax1.set_ylabel("J-measure values") plt.show() ''' return bestCentroids, bestDistortion
def Get_Best_Centroids(k, iterations): feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0; bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion ''' plt.close() fig, (ax1) = plt.subplots(1) ax1.plot(j_measures) ax1.set_title("J measures over multiple iterations of k") ax1.set_xlabel("iterations") ax1.set_ylabel("J-measure values") plt.show() ''' return bestCentroids, bestDistortion
def feature_selection(args): ''' run clustering on a range of k's''' print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) kMin = args.k_min kMax = args.k_max kHop = args.k_hop mfccs = feature_holder.get_feature('mfcc') nmfcc = len(mfccs) print "N MFCCS:", nmfcc results = [] for k in range(kMin, kMax, kHop): print "Running k-Means with k=%d" % (k) if k >= nmfcc: print "WARNING! k is greater than the number of samples!" centroids, distortion = Get_Best_Centroids(k,20) classes, dist = kmeans.scipy_vq(mfccs, centroids) J0 = calcJ(mfccs, classes, centroids, k) results.append( (k, distortion, dist, J0) ) plot.plot_feature_selection(kMin, kMax, kHop, results)
def Get_Best_Centroids(k, iterations): print "Feature Analysis/Clustering Mode - feature selection from multiple k's" feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) mfccs = feature_holder.get_feature('mfcc') j_measures = np.zeros(iterations) max = 0; bestCentroids = 0 bestDistortion = 0 for i in range(iterations): centroids, distortion = kmeans.scipy_kmeans(mfccs, k) classes, dist = kmeans.scipy_vq(mfccs, centroids) j_measures[i] = calcJ(mfccs, classes, centroids, k) if j_measures[i] > max: max = j_measures[i] bestCentroids = centroids bestDistortion = distortion return bestCentroids, bestDistortion
def clustering(args): ''' run clustering on a single k''' print "Feature Analysis/Clustering Mode: single k" feature_holder = featurevector.feature_holder(filename=FEATURE_VECTOR_FILENAME) sones_holder = featurevector.feature_holder(filename=SONE_VECTOR_FILENAME) k = args.k print feature_holder mfccs = feature_holder.get_feature('mfcc') print sones_holder sones = sones_holder.get_feature('sones') centroids, distortion = Get_Best_Centroids(k, 1) print "Distortion for this run: %0.3f" % (distortion) classes,dist = kmeans.scipy_vq(mfccs, centroids) # Get the inter class dist matrix inter_class_dist_matrix = mir_utils.GetSquareDistanceMatrix(centroids) eventBeginnings = feature_holder.get_event_start_indecies() # write audio if given -w if args.plot_segments: PlotWaveformWClasses(k, feature_holder,classes) if args.write_audio_results: WriteAudioFromClasses(k, feature_holder, classes) plot.plot(mfccs, sones, eventBeginnings, centroids, inter_class_dist_matrix, classes)