コード例 #1
0
'''Load appropriate data from npy's'''
all_reports = np.load('all_reports.npy')
components = np.load('components.npy')
labels = np.load('labels2.npy')
all_tests = np.load('all_tests.npy')
all_predicts = np.load('all_predicts.npy')
all_importances = np.load('all_importances.npy')
X_unscaled = np.load('X_unscaled.npy')
X_unscaled = normalize(X_unscaled, axis=0)
'''aggregate the importances from each of the KFolds'''
all_importances = np.sum(all_importances, axis=0) / all_importances.shape[0]
'''
It is convenient to work with
the pandas data frame for some plots
'''
modeler = Modeler()
df = modeler.df
'''
Get feature mask data and labels so we know what to plot
'''
all_masks = modeler.extract_frequencies_and_indeces()
frequency_bins = all_masks[-2]
'''Start plotting'''
make_cluster_purity_plots(components, labels)
conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts)
make_acc_prec_rec_plots(conf_mat)
mask_labels = ['Energy', 'Beat Strength',\
'<Beat Separation>', '$Med(Beat\ \ Separation)$',\
'$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy']
features_of_interest =\
plot_feature_importance(all_importances, all_masks[:-2], mask_labels, all_masks[-1])
コード例 #2
0
    
'''
conduct feature generation using waveread, 
this takes several hours    
'''
file_convert = False    
if file_convert:        
    reader = WaveRead(60000, .05)
    reader.convert_all()
    
    
    
'''this creates the supervised and unsupervised models'''
conduct_model = True
if conduct_model:
    modeler = Modeler()
    '''run supervised/unsupervised models here'''
    '''
    OUTPUTS BELOW: components = 1-d numpy array of kmeans cluster labels; 
    song_names = 1-d numpy array of song names; 
        song_labels =1-d numpy array of genre labels ; 
        components2 = 1-d numpy array of kmeans/nmf cluster labels; 
        all_reports = list of confusion matrices (2-d numpy arrays) from each KFold output; 
        all_tests = list of actual labels (1-d numpy arrays) from each KFOLD test, 
        all_predicts= list of predicted labels (1-d numpy arrays) from each KFOLD test; 
        all_importances= list of feature importances for each KFOLD test
    '''
    components, song_names, song_labels,\
    components2, all_reports,\
    all_tests, all_predicts, all_importances = modeler.run_all_models()
    print "KMEANS"
コード例 #3
0
'''Load appropriate data from npy's'''
all_reports = np.load('all_reports.npy')    
components = np.load('components.npy')
labels = np.load('labels2.npy')
all_tests = np.load('all_tests.npy')
all_predicts = np.load('all_predicts.npy')
all_importances = np.load('all_importances.npy')
X_unscaled = np.load('X_unscaled.npy')
X_unscaled  = normalize(X_unscaled, axis = 0)    
'''aggregate the importances from each of the KFolds'''
all_importances = np.sum(all_importances, axis = 0) / all_importances.shape[0]
'''
It is convenient to work with
the pandas data frame for some plots
'''
modeler = Modeler()
df = modeler.df
'''
Get feature mask data and labels so we know what to plot
'''
all_masks = modeler.extract_frequencies_and_indeces()
frequency_bins = all_masks[-2]
 
'''Start plotting'''
make_cluster_purity_plots(components, labels)
conf_mat = plot_supervised_confusion_matrix(all_tests, all_predicts)
make_acc_prec_rec_plots(conf_mat)
mask_labels = ['Energy', 'Beat Strength',\
'<Beat Separation>', '$Med(Beat\ \ Separation)$',\
'$Std(Beat\ \ Separation)$', 'ZCR data', 'Total energy']
features_of_interest =\