label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) #save the trained model to disk if outModelName: joblib.dump(clf, outModelName) return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) if __name__ == "__main__": start = timeit.default_timer() print print " Starting classification \n" print " Classification running ... \n" X, y = read_ceps(genre_list) train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) print " Classification finished \n" stop = timeit.default_timer() print " Total time taken (s) = ", (stop - start) print "\n Plotting confusion matrix ... \n" plot_confusion_matrix(cm_norm, genre_list, "ceps", "CEPS classifier - Confusion matrix") print " All Done\n" print " See plots in 'graphs' directory \n"
desc = "%s %s" % (name, genre_list[label]) plot_roc(roc_scores[label][median], desc, tprs[label][median], fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) return np.mean(train_errors), np.mean(test_errors), np.asarray(cms) def create_model(): from sklearn.linear_model.logistic import LogisticRegression clf = LogisticRegression() return clf if __name__ == "__main__": X, y = read_ceps(genre_list) train_avg, test_avg, cms = train_model( create_model, X, y, "Log Reg CEPS", plot=True) cm_avg = np.mean(cms, axis=0) cm_norm = cm_avg / np.sum(cm_avg, axis=0) plot_confusion_matrix(cm_norm, genre_list, "ceps", "Confusion matrix of a CEPS based classifier")
from sklearn.cross_validation import ShuffleSplit from sklearn.metrics import confusion_matrix from sklearn.externals import joblib from sklearn import preprocessing from sklearn import svm from sklearn.metrics.scorer import make_scorer from utils import ENGLISH_GENRE_LIST, ENGLISH_GENRE_DIR, TEST_DIR from utils import plot_confusion_matrix, plot_roc_curves from ceps import read_ceps, read_ceps_test genre_list = ENGLISH_GENRE_LIST GENRE_DIR=ENGLISH_GENRE_DIR start = timeit.default_timer() print("\n") print (" Starting classification \n") print (" Classification running ... \n") X, y = read_ceps(genre_list,GENRE_DIR) print(" X is " , X, "len of x is ",len(X),X.shape) print("y is ",y) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=.20, random_state=0) scaler = preprocessing.StandardScaler().fit(X_train) X_train_transformed = scaler.transform(X_train) clf = LDA().fit(X_train_transformed, y_train) X_test_transformed = scaler.transform(X_test) print(clf.score(X_test_transformed, y_test)) predicted = cross_val_predict(clf, X, y, cv=10) joblib.dump(clf, 'saved_model/englishLDAmodel.pkl') print(metrics.accuracy_score(y, predicted) ) scoring = {'prec_macro': 'precision_macro','rec_micro': make_scorer(recall_score, average='macro')} scores = cross_validate(clf, X, y, scoring=scoring,cv=5, return_train_score=True) sorted(scores.keys())
_dst = train_model(create_model, X, y, ceps_fn) _cmd = 'mv "%s" "%s/%s"' % (src_file, DEST_DIR, _dst) print(">>>[%s]" % _cmd) os.system(_cmd) if __name__ == "__main__": if not os.path.isdir(SRC_DIR): print("\n\n\tError: %s is not exist\n\n" % SRC_DIR) sys.exit() """获取 训练样本 """ X, y = ceps.read_ceps(genre_list) if not os.path.isdir(DEST_DIR): os.system('mkdir "%s"' % DEST_DIR) for _dir in genre_list: __dir = DEST_DIR + "/" + _dir if not os.path.isdir(__dir): __dir = os.path.join(DEST_DIR, _dir) _cmd = 'mkdir "%s"' % __dir os.system(_cmd) d_list = getDir(SRC_DIR) for _file in d_list: if (_file[1] == "wav") or (_file[1] == "Wav") or (_file[1] == "flac") or (_file[1] == "mp3"): cmd = 'sox "%s" -r 22050 -c 1 "%s/temp.wav"' % (_file[0], TEMP_DIR)
def __init__(self): super(CepsSoundClassify, self).__init__() self._X, self._Y = read_ceps(GENRE_LIST)
import matplotlib.pyplot as plt from sklearn import model_selection from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from utils import ENGLISH_GENRE_LIST,ENGLISH_GENRE_DIR from sklearn import preprocessing GENRE_LIST=ENGLISH_GENRE_LIST GENRE_DIR=ENGLISH_GENRE_DIR from ceps import read_ceps # load dataset X,Y=read_ceps(GENRE_LIST,GENRE_DIR) print(X) # prepare configuration for cross validation test harness seed = 7 # prepare models models = [] models.append(('LR', LogisticRegression())) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turn results = [] names = [] scoring = 'accuracy'