'''Load appropriate data from npy's''' all_reports = np.load('all_reports.npy') components = np.load('components.npy') labels = np.load('labels2.npy') all_tests = np.load('all_tests.npy') all_predicts = np.load('all_predicts.npy') all_importances = np.load('all_importances.npy') X_unscaled = np.load('X_unscaled.npy') X_unscaled = normalize(X_unscaled, axis=0) '''aggregate the importances from each of the KFolds''' all_importances = np.sum(all_importances, axis=0) / all_importances.shape[0] ''' It is convenient to work with the pandas data frame for some plots ''' modeler = Modeler() df = modeler.df ''' Get feature mask data and labels so we know what to plot ''' all_masks = modeler.extract_frequencies_and_indeces() frequency_bins = all_masks[-2] '''Start plotting''' make_cluster_purity_plots(components, labels) conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts) make_acc_prec_rec_plots(conf_mat) mask_labels = ['Energy', 'Beat Strength',\ '<Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] features_of_interest =\ plot_feature_importance(all_importances, all_masks[:-2], mask_labels, all_masks[-1])
''' conduct feature generation using waveread, this takes several hours ''' file_convert = False if file_convert: reader = WaveRead(60000, .05) reader.convert_all() '''this creates the supervised and unsupervised models''' conduct_model = True if conduct_model: modeler = Modeler() '''run supervised/unsupervised models here''' ''' OUTPUTS BELOW: components = 1-d numpy array of kmeans cluster labels; song_names = 1-d numpy array of song names; song_labels =1-d numpy array of genre labels ; components2 = 1-d numpy array of kmeans/nmf cluster labels; all_reports = list of confusion matrices (2-d numpy arrays) from each KFold output; all_tests = list of actual labels (1-d numpy arrays) from each KFOLD test, all_predicts= list of predicted labels (1-d numpy arrays) from each KFOLD test; all_importances= list of feature importances for each KFOLD test ''' components, song_names, song_labels,\ components2, all_reports,\ all_tests, all_predicts, all_importances = modeler.run_all_models() print "KMEANS"
'''Load appropriate data from npy's''' all_reports = np.load('all_reports.npy') components = np.load('components.npy') labels = np.load('labels2.npy') all_tests = np.load('all_tests.npy') all_predicts = np.load('all_predicts.npy') all_importances = np.load('all_importances.npy') X_unscaled = np.load('X_unscaled.npy') X_unscaled = normalize(X_unscaled, axis = 0) '''aggregate the importances from each of the KFolds''' all_importances = np.sum(all_importances, axis = 0) / all_importances.shape[0] ''' It is convenient to work with the pandas data frame for some plots ''' modeler = Modeler() df = modeler.df ''' Get feature mask data and labels so we know what to plot ''' all_masks = modeler.extract_frequencies_and_indeces() frequency_bins = all_masks[-2] '''Start plotting''' make_cluster_purity_plots(components, labels) conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts) make_acc_prec_rec_plots(conf_mat) mask_labels = ['Energy', 'Beat Strength',\ '<Beat Separation>', '$Med(Beat\ \ Separation)$',\ '$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy'] features_of_interest =\