It is convenient to work with the pandas data frame for some plots ''' modeler = Modeler() df = modeler.df ''' Get feature mask data and labels so we know what to plot ''' all_masks = modeler.extract_frequencies_and_indeces() frequency_bins = all_masks[-2] '''Start plotting''' make_cluster_purity_plots(components, labels) conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts) make_acc_prec_rec_plots(conf_mat) mask_labels = ['Energy', 'Beat Strength',\ '<Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] features_of_interest =\ plot_feature_importance(all_importances, all_masks[:-2], mask_labels, all_masks[-1]) '''We plot the top 5 features of interest in our scatter plots''' make_scatter_plots(features_of_interest[0:5], df) '''Make more detailed mask labels''' mask_labels = ['Energy (scaled to total song energy)', 'Normalized Beat Strength',\ 'Normalized <Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] make_feature_vs_frequency_plots(all_masks, mask_labels, X_unscaled, frequency_bins, labels) reader = WaveRead() sample_rate, recording = reader.get_recording('Tupac- Smile.wav') plot_signal_vs_time_data(recording, sample_rate)
for each song; labels = list of corresponding genre labels (strings) for each song ''' foo = zip(components, labels) struct_array = np.array([np.array([element[0], element[1]]) for element in foo]) for name in set(struct_array[:,1]): mask = struct_array[:,1] == name print name print Counter(struct_array[mask][:,0]) ''' conduct feature generation using waveread, this takes several hours ''' file_convert = False if file_convert: reader = WaveRead(60000, .05) reader.convert_all() '''this creates the supervised and unsupervised models''' conduct_model = True if conduct_model: modeler = Modeler() '''run supervised/unsupervised models here''' ''' OUTPUTS BELOW: components = 1-d numpy array of kmeans cluster labels; song_names = 1-d numpy array of song names; song_labels =1-d numpy array of genre labels ; components2 = 1-d numpy array of kmeans/nmf cluster labels; all_reports = list of confusion matrices (2-d numpy arrays) from each KFold output;
the pandas data frame for some plots ''' modeler = Modeler() df = modeler.df ''' Get feature mask data and labels so we know what to plot ''' all_masks = modeler.extract_frequencies_and_indeces() frequency_bins = all_masks[-2] '''Start plotting''' make_cluster_purity_plots(components, labels) conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts) make_acc_prec_rec_plots(conf_mat) mask_labels = ['Energy', 'Beat Strength',\ '<Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] features_of_interest =\ plot_feature_importance(all_importances, all_masks[:-2], mask_labels, all_masks[-1]) '''We plot the top 5 features of interest in our scatter plots''' make_scatter_plots(features_of_interest[0:5], df) '''Make more detailed mask labels''' mask_labels = ['Energy (scaled to total song energy)', 'Normalized Beat Strength',\ 'Normalized <Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] make_feature_vs_frequency_plots(all_masks, mask_labels, X_unscaled, frequency_bins, labels) reader = WaveRead() sample_rate, recording = reader.get_recording('Tupac- Smile.wav') plot_signal_vs_time_data(recording, sample_rate)