def get_evaluation_report(groundTruth, predictResult): acc = accuracy_score(groundTruth, predictResult) pre = precision_score(groundTruth, predictResult) rec = recall_score(groundTruth, predictResult) f1 = f1_score(groundTruth, predictResult) log_the_string('acc:%.2f,pre:%.2f,rec:%.2f,f1:%.2f' % (acc, pre, rec, f1)) return acc, pre, rec, f1
def save_dataset_2_h5(dataX, labelY, dataset_path='dataset.h5'): log_the_string('now is saving to h5') dataset_file = h5py.File(dataset_path, 'w') dataset_file.create_dataset('dataX', data=dataX) dataset_file.create_dataset('labelY', data=labelY) dataset_file.close() return 0
def use_gaussianNB(dataX, lableY): modelPath = 'gnb.model' if not os.path.isfile(modelPath): log_the_string('use gaussianNB and train it save it...') clf = GaussianNB() clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_nearesrNeighbors(dataX, lableY): modelPath = 'nn.model' if not os.path.isfile(modelPath): log_the_string('use nearesrNeighbors and train it save it...') clf = NearestCentroid() clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_SGD(dataX, lableY): modelPath = 'sgd.model' if not os.path.isfile(modelPath): log_the_string('use SGD and train it save it...') clf = SGDClassifier(loss="hinge", penalty="l2") clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_SVM(dataX, lableY): modelPath = 'svm.model' if not os.path.isfile(modelPath): log_the_string('use SVM and train it save it...') clf = LinearSVC() clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_decision_classify_tree(dataX, lableY): modelPath = 'dt.model' if not os.path.isfile(modelPath): log_the_string('use decision tree and train it save it...') clf = tree.DecisionTreeClassifier() clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_MLP(dataX, lableY): modelPath = 'mlp.model' if not os.path.isfile(modelPath): log_the_string('use MLP and train it save it...') clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
def use_randomForest(dataX, lableY): modelPath = 'randomForest.model' if not os.path.isfile(modelPath): log_the_string('use randomForest and train it save it...') clf = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0) clf = clf.fit(dataX, lableY) joblib.dump(clf, modelPath) else: clf = joblib.load(modelPath) return clf
dataX, labelY = load_dataset_from_h5() # dataX, labelY = batch_get_mfcc() print('load dataX shape:', dataX.shape) X_train, X_test, y_train, y_test = train_test_split(dataX, labelY, test_size=0.2) dctModel = use_decision_classify_tree(X_train, y_train) svmModel = use_SVM(X_train, y_train) sgdModel = use_SGD(X_train, y_train) nearModle = use_nearesrNeighbors(X_train, y_train) gaussianNB_Model = use_gaussianNB(X_train, y_train) randomForestModel = use_randomForest(X_train, y_train) mlpModel = use_MLP(X_train, y_train) dctPredict = dctModel.predict(X_test) svmPredict = svmModel.predict(X_test) sgdPredict = sgdModel.predict(X_test) nearPredict = nearModle.predict(X_test) gaussianNB_Predict = gaussianNB_Model.predict(X_test) randomForestPredict = randomForestModel.predict(X_test) mlpPredict = mlpModel.predict(X_test) final_vote_res = dctPredict + svmPredict + sgdPredict + nearPredict + gaussianNB_Predict + randomForestPredict + mlpPredict final_vote_res_0_1 = [1 if item > 3 else 0 for item in final_vote_res] get_evaluation_report(y_test, dctPredict) get_evaluation_report(y_test, svmPredict) get_evaluation_report(y_test, final_vote_res_0_1) end = time() log_the_string('it takes %.2f s' % (end - start))
def load_dataset_from_h5(dataset_path='dataset.h5'): log_the_string('now is loading from h5') dataset_file = h5py.File(dataset_path, 'r') dataX = dataset_file['dataX'][:] labelY = dataset_file['labelY'][:] return dataX, labelY
def get_mfcc(wavFilePath): log_the_string('now is processing %s...' % wavFilePath) sampleRate, audioData = wavread(wavFilePath) audioData = numpy.array([item / 2**15 for item in audioData]) mfcc_feat = mfcc(audioData, sampleRate) return mfcc_feat