Esempio n. 1
0
def get_evaluation_report(groundTruth, predictResult):
    acc = accuracy_score(groundTruth, predictResult)
    pre = precision_score(groundTruth, predictResult)
    rec = recall_score(groundTruth, predictResult)
    f1 = f1_score(groundTruth, predictResult)
    log_the_string('acc:%.2f,pre:%.2f,rec:%.2f,f1:%.2f' % (acc, pre, rec, f1))
    return acc, pre, rec, f1
Esempio n. 2
0
def save_dataset_2_h5(dataX, labelY, dataset_path='dataset.h5'):
    log_the_string('now is saving to h5')
    dataset_file = h5py.File(dataset_path, 'w')
    dataset_file.create_dataset('dataX', data=dataX)
    dataset_file.create_dataset('labelY', data=labelY)
    dataset_file.close()
    return 0
Esempio n. 3
0
def use_gaussianNB(dataX, lableY):
    modelPath = 'gnb.model'
    if not os.path.isfile(modelPath):
        log_the_string('use gaussianNB and train it save it...')
        clf = GaussianNB()
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 4
0
def use_nearesrNeighbors(dataX, lableY):
    modelPath = 'nn.model'
    if not os.path.isfile(modelPath):
        log_the_string('use nearesrNeighbors and train it save it...')
        clf = NearestCentroid()
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 5
0
def use_SGD(dataX, lableY):
    modelPath = 'sgd.model'
    if not os.path.isfile(modelPath):
        log_the_string('use SGD and train it save it...')
        clf = SGDClassifier(loss="hinge", penalty="l2")
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 6
0
def use_SVM(dataX, lableY):
    modelPath = 'svm.model'
    if not os.path.isfile(modelPath):
        log_the_string('use SVM and train it save it...')
        clf = LinearSVC()
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 7
0
def use_decision_classify_tree(dataX, lableY):
    modelPath = 'dt.model'
    if not os.path.isfile(modelPath):
        log_the_string('use decision tree and train it save it...')
        clf = tree.DecisionTreeClassifier()
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 8
0
def use_MLP(dataX, lableY):
    modelPath = 'mlp.model'
    if not os.path.isfile(modelPath):
        log_the_string('use MLP and train it save it...')
        clf = MLPClassifier(solver='lbfgs',
                            alpha=1e-5,
                            hidden_layer_sizes=(5, 2),
                            random_state=1)
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 9
0
def use_randomForest(dataX, lableY):
    modelPath = 'randomForest.model'
    if not os.path.isfile(modelPath):
        log_the_string('use randomForest and train it save it...')
        clf = RandomForestClassifier(n_estimators=10,
                                     max_depth=None,
                                     min_samples_split=2,
                                     random_state=0)
        clf = clf.fit(dataX, lableY)
        joblib.dump(clf, modelPath)
    else:
        clf = joblib.load(modelPath)
    return clf
Esempio n. 10
0
    dataX, labelY = load_dataset_from_h5()  # dataX, labelY = batch_get_mfcc()
    print('load dataX shape:', dataX.shape)
    X_train, X_test, y_train, y_test = train_test_split(dataX,
                                                        labelY,
                                                        test_size=0.2)
    dctModel = use_decision_classify_tree(X_train, y_train)
    svmModel = use_SVM(X_train, y_train)
    sgdModel = use_SGD(X_train, y_train)
    nearModle = use_nearesrNeighbors(X_train, y_train)
    gaussianNB_Model = use_gaussianNB(X_train, y_train)
    randomForestModel = use_randomForest(X_train, y_train)
    mlpModel = use_MLP(X_train, y_train)

    dctPredict = dctModel.predict(X_test)
    svmPredict = svmModel.predict(X_test)
    sgdPredict = sgdModel.predict(X_test)
    nearPredict = nearModle.predict(X_test)
    gaussianNB_Predict = gaussianNB_Model.predict(X_test)
    randomForestPredict = randomForestModel.predict(X_test)
    mlpPredict = mlpModel.predict(X_test)

    final_vote_res = dctPredict + svmPredict + sgdPredict + nearPredict + gaussianNB_Predict + randomForestPredict + mlpPredict

    final_vote_res_0_1 = [1 if item > 3 else 0 for item in final_vote_res]

    get_evaluation_report(y_test, dctPredict)
    get_evaluation_report(y_test, svmPredict)
    get_evaluation_report(y_test, final_vote_res_0_1)
    end = time()
    log_the_string('it takes %.2f s' % (end - start))
Esempio n. 11
0
def load_dataset_from_h5(dataset_path='dataset.h5'):
    log_the_string('now is loading from h5')
    dataset_file = h5py.File(dataset_path, 'r')
    dataX = dataset_file['dataX'][:]
    labelY = dataset_file['labelY'][:]
    return dataX, labelY
Esempio n. 12
0
def get_mfcc(wavFilePath):
    log_the_string('now is processing %s...' % wavFilePath)
    sampleRate, audioData = wavread(wavFilePath)
    audioData = numpy.array([item / 2**15 for item in audioData])
    mfcc_feat = mfcc(audioData, sampleRate)
    return mfcc_feat