label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores),
               np.std(all_pr_scores))
    #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    #save the trained model to disk
    if outModelName: joblib.dump(clf, outModelName)

    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


if __name__ == "__main__":
    start = timeit.default_timer()
    print
    print " Starting classification \n"
    print " Classification running ... \n"
    X, y = read_ceps(genre_list)
    train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True)
    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)
    print " Classification finished \n"
    stop = timeit.default_timer()
    print " Total time taken (s) = ", (stop - start)
    print "\n Plotting confusion matrix ... \n"
    plot_confusion_matrix(cm_norm, genre_list, "ceps",
                          "CEPS classifier - Confusion matrix")
    print " All Done\n"
    print " See plots in 'graphs' directory \n"
            desc = "%s %s" % (name, genre_list[label])
            plot_roc(roc_scores[label][median], desc, tprs[label][median],
                     fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores),
               np.mean(all_pr_scores), np.std(all_pr_scores))
    print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


def create_model():
    from sklearn.linear_model.logistic import LogisticRegression
    clf = LogisticRegression()

    return clf


if __name__ == "__main__":
    X, y = read_ceps(genre_list)

    train_avg, test_avg, cms = train_model(
        create_model, X, y, "Log Reg CEPS", plot=True)

    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)

    plot_confusion_matrix(cm_norm, genre_list, "ceps",
                          "Confusion matrix of a CEPS based classifier")
from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib
from sklearn import preprocessing
from sklearn import svm
from sklearn.metrics.scorer import make_scorer
from utils import ENGLISH_GENRE_LIST, ENGLISH_GENRE_DIR, TEST_DIR
from utils import plot_confusion_matrix, plot_roc_curves
from ceps import read_ceps, read_ceps_test
genre_list = ENGLISH_GENRE_LIST
GENRE_DIR=ENGLISH_GENRE_DIR
start = timeit.default_timer()
print("\n")
print (" Starting classification \n")
print (" Classification running ... \n") 
X, y = read_ceps(genre_list,GENRE_DIR)
print(" X is " , X, "len of x is ",len(X),X.shape)
print("y is ",y)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=.20, random_state=0)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_transformed = scaler.transform(X_train)
clf = LDA().fit(X_train_transformed, y_train)
X_test_transformed = scaler.transform(X_test)
print(clf.score(X_test_transformed, y_test))
predicted = cross_val_predict(clf, X, y, cv=10)
joblib.dump(clf, 'saved_model/englishLDAmodel.pkl')

print(metrics.accuracy_score(y, predicted) )
scoring = {'prec_macro': 'precision_macro','rec_micro': make_scorer(recall_score, average='macro')}
scores = cross_validate(clf, X, y, scoring=scoring,cv=5, return_train_score=True)
sorted(scores.keys())                 
        _dst = train_model(create_model, X, y, ceps_fn)

    _cmd = 'mv "%s" "%s/%s"' % (src_file, DEST_DIR, _dst)
    print(">>>[%s]" % _cmd)
    os.system(_cmd)


if __name__ == "__main__":

    if not os.path.isdir(SRC_DIR):
        print("\n\n\tError: %s is not exist\n\n" % SRC_DIR)
        sys.exit()

    """获取 训练样本
    """
    X, y = ceps.read_ceps(genre_list)

    if not os.path.isdir(DEST_DIR):
        os.system('mkdir "%s"' % DEST_DIR)
    for _dir in genre_list:
        __dir = DEST_DIR + "/" + _dir
        if not os.path.isdir(__dir):
            __dir = os.path.join(DEST_DIR, _dir)
            _cmd = 'mkdir "%s"' % __dir
            os.system(_cmd)

    d_list = getDir(SRC_DIR)

    for _file in d_list:
        if (_file[1] == "wav") or (_file[1] == "Wav") or (_file[1] == "flac") or (_file[1] == "mp3"):
            cmd = 'sox "%s" -r 22050 -c 1 "%s/temp.wav"' % (_file[0], TEMP_DIR)
 def __init__(self):
     super(CepsSoundClassify, self).__init__()
     self._X, self._Y = read_ceps(GENRE_LIST)
Exemple #6
0
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from utils import ENGLISH_GENRE_LIST,ENGLISH_GENRE_DIR
from sklearn import preprocessing
GENRE_LIST=ENGLISH_GENRE_LIST
GENRE_DIR=ENGLISH_GENRE_DIR
from ceps import read_ceps
# load dataset

X,Y=read_ceps(GENRE_LIST,GENRE_DIR)
print(X)
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'