Exemple #1
0
def featureAndTrainRegression(dirName,
                              mtWin,
                              mtStep,
                              stWin,
                              stStep,
                              modelType,
                              modelName,
                              computeBEAT=False):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        dirName:        path of directory containing the WAV files and Regression CSVs
        mtWin, mtStep:        mid-term window length and step
        stWin, stStep:        short-term window and step
        modelType:        "svm" or "knn" or "randomforest"
        modelName:        name of the model to be saved
    RETURNS:
        None. Resulting regression model along with the respective model parameters are saved on files.
    '''
    # STEP A: Feature Extraction:
    [features, _,
     fileNames] = aF.dirsWavFeatureExtraction([dirName],
                                              mtWin,
                                              mtStep,
                                              stWin,
                                              stStep,
                                              computeBEAT=computeBEAT)
    features = features[0]
    fileNames = [ntpath.basename(f) for f in fileNames[0]]

    # Read CSVs:
    CSVs = glob.glob(dirName + os.sep + "*.csv")
    regressionLabels = []
    regressionNames = []
    for c in CSVs:  # for each CSV
        curRegressionLabels = numpy.zeros(
            (len(fileNames, ))
        )  # read filenames, map to "fileNames" and append respective values in the regressionLabels
        with open(c, 'rb') as csvfile:
            CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in CSVreader:
                if len(row) == 2:
                    if row[0] + ".wav" in fileNames:
                        index = fileNames.index(row[0] + ".wav")
                        curRegressionLabels[index] = float(row[1])
        regressionLabels.append(
            curRegressionLabels
        )  # curRegressionLabels is the list of values for the current regression problem
        regressionNames.append(ntpath.basename(c).replace(
            ".csv", ""))  # regression task name
    if len(features) == 0:
        print "ERROR: No data found in any input folder!"
        return

    numOfFeatures = features.shape[1]

    # TODO: ARRF WRITE????
    # STEP B: Classifier Evaluation and Parameter Selection:
    if modelType == "svm":
        modelParams = numpy.array(
            [0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0])
    elif modelType == "randomforest":
        modelParams = numpy.array([5, 10, 25, 50, 100])


#    elif modelType == "knn":
#        modelParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]);

    for iRegression, r in enumerate(regressionNames):
        # get optimal classifeir parameter:
        print "Regression task " + r
        bestParam = evaluateRegression(features, regressionLabels[iRegression],
                                       100, modelType, modelParams)
        print "Selected params: {0:.5f}".format(bestParam)

        [featuresNorm, MEAN,
         STD] = normalizeFeatures([features])  # normalize features

        # STEP C: Save the model to file
        if modelType == "svm":
            Classifier, _ = trainSVMregression(featuresNorm[0],
                                               regressionLabels[iRegression],
                                               bestParam)
            with open(modelName + "_" + r, 'wb') as fid:  # save to file
                cPickle.dump(Classifier, fid)
            fo = open(modelName + "_" + r + "MEANS", "wb")
            cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            fo.close()
        '''             TODO
Exemple #2
0
def featureAndTrain(listOfDirs,
                    mtWin,
                    mtStep,
                    stWin,
                    stStep,
                    classifierType,
                    modelName,
                    computeBEAT=False,
                    perTrain=0.90):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        listOfDirs:        list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files.
        mtWin, mtStep:        mid-term window length and step
        stWin, stStep:        short-term window and step
        classifierType:        "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees"
        modelName:        name of the model to be saved
    RETURNS:
        None. Resulting classifier along with the respective model parameters are saved on files.
    '''

    # STEP A: Feature Extraction:
    [features, classNames,
     _] = aF.dirsWavFeatureExtraction(listOfDirs,
                                      mtWin,
                                      mtStep,
                                      stWin,
                                      stStep,
                                      computeBEAT=computeBEAT)

    if len(features) == 0:
        print "trainSVM_feature ERROR: No data found in any input folder!"
        return

    numOfFeatures = features[0].shape[1]
    featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)]

    writeTrainDataToARFF(modelName, features, classNames, featureNames)

    for i, f in enumerate(features):
        if len(f) == 0:
            print "trainSVM_feature ERROR: " + listOfDirs[
                i] + " folder is empty or non-existing!"
            return

    # STEP B: Classifier Evaluation and Parameter Selection:
    if classifierType == "svm" or classifierType == "svm_rbf":
        classifierParams = numpy.array(
            [0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0])
    elif classifierType == "randomforest":
        classifierParams = numpy.array([10, 25, 50, 100, 200, 500])
    elif classifierType == "knn":
        classifierParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15])
    elif classifierType == "gradientboosting":
        classifierParams = numpy.array([10, 25, 50, 100, 200, 500])
    elif classifierType == "extratrees":
        classifierParams = numpy.array([10, 25, 50, 100, 200, 500])

    # get optimal classifeir parameter:
    bestParam = evaluateClassifier(features, classNames, 100, classifierType,
                                   classifierParams, 0, perTrain)

    print "Selected params: {0:.5f}".format(bestParam)

    C = len(classNames)
    [featuresNorm, MEAN,
     STD] = normalizeFeatures(features)  # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = featuresNorm

    # STEP C: Save the classifier to file
    if classifierType == "svm":
        Classifier = trainSVM(featuresNew, bestParam)
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifierType == "svm_rbf":
        Classifier = trainSVM_RBF(featuresNew, bestParam)
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()

    elif classifierType == "randomforest":
        Classifier = trainRandomForest(featuresNew, bestParam)
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifierType == "gradientboosting":
        Classifier = trainGradientBoosting(featuresNew, bestParam)
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifierType == "extratrees":
        Classifier = trainExtraTrees(featuresNew, bestParam)
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifierType == "knn":
        [X, Y] = listOfFeatures2Matrix(featuresNew)
        X = X.tolist()
        Y = Y.tolist()
        fo = open(modelName, "wb")
        cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
def featureAndTrainRegression(dir_name,
                              mt_win,
                              mt_step,
                              st_win,
                              st_step,
                              model_type,
                              model_name,
                              compute_beat=False):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        dir_name:        path of directory containing the WAV files and Regression CSVs
        mt_win, mt_step:        mid-term window length and step
        st_win, st_step:        short-term window and step
        model_type:        "svm" or "knn" or "randomforest"
        model_name:        name of the model to be saved
    RETURNS:
        None. Resulting regression model along with the respective model parameters are saved on files.
    '''
    # STEP A: Feature Extraction:
    [features, _,
     filenames] = aF.dirsWavFeatureExtraction([dir_name],
                                              mt_win,
                                              mt_step,
                                              st_win,
                                              st_step,
                                              compute_beat=compute_beat)
    features = features[0]
    filenames = [ntpath.basename(f) for f in filenames[0]]
    f_final = []

    # Read CSVs:
    CSVs = glob.glob(dir_name + os.sep + "*.csv")
    regression_labels = []
    regression_names = []
    f_final = []
    for c in CSVs:  # for each CSV
        cur_regression_labels = []
        f_temp = []
        with open(
                c, 'rt'
        ) as csvfile:  # open the csv file that contains the current target value's annotations
            CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in CSVreader:
                if len(
                        row
                ) == 2:  # if the current row contains two fields (filename, target value)
                    if row[0] in filenames:  # ... and if the current filename exists in the list of filenames
                        index = filenames.index(row[0])
                        cur_regression_labels.append(float(row[1]))
                        f_temp.append(features[index, :])
                    else:
                        print("Warning: {} not found in list of files.".format(
                            row[0]))
                else:
                    print(
                        "Warning: Row with unknown format in regression file")

        f_final.append(numpy.array(f_temp))
        regression_labels.append(
            numpy.array(cur_regression_labels)
        )  # cur_regression_labels is the list of values for the current regression problem
        regression_names.append(ntpath.basename(c).replace(
            ".csv", ""))  # regression task name
        if len(features) == 0:
            print("ERROR: No data found in any input folder!")
            return

    n_feats = f_final[0].shape[1]

    # TODO: ARRF WRITE????
    # STEP B: classifier Evaluation and Parameter Selection:
    if model_type == "svm" or model_type == "svm_rbf":
        model_params = numpy.array(
            [0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0])
    elif model_type == "randomforest":
        model_params = numpy.array([5, 10, 25, 50, 100])


#    elif model_type == "knn":
#        model_params = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]);
    errors = []
    errors_base = []
    best_params = []

    for iRegression, r in enumerate(regression_names):
        # get optimal classifeir parameter:
        print("Regression task " + r)
        bestParam, error, berror = evaluateRegression(
            f_final[iRegression], regression_labels[iRegression], 100,
            model_type, model_params)
        errors.append(error)
        errors_base.append(berror)
        best_params.append(bestParam)
        print("Selected params: {0:.5f}".format(bestParam))

        [features_norm, MEAN,
         STD] = normalizeFeatures([f_final[iRegression]])  # normalize features

        # STEP C: Save the model to file
        if model_type == "svm":
            classifier, _ = trainSVMregression(features_norm[0],
                                               regression_labels[iRegression],
                                               bestParam)
        if model_type == "svm_rbf":
            classifier, _ = trainSVMregression_rbf(
                features_norm[0], regression_labels[iRegression], bestParam)
        if model_type == "randomforest":
            classifier, _ = trainRandomForestRegression(
                features_norm[0], regression_labels[iRegression], bestParam)

        if model_type == "svm" or model_type == "svm_rbf" or model_type == "randomforest":
            with open(model_name + "_" + r, 'wb') as fid:
                cPickle.dump(classifier, fid)
            fo = open(model_name + "_" + r + "MEANS", "wb")
            cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            fo.close()
    return errors, errors_base, best_params
def featureAndTrainRegression(dirName, mtWin, mtStep, stWin, stStep, modelType, modelName, computeBEAT=False):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        dirName:        path of directory containing the WAV files and Regression CSVs
        mtWin, mtStep:        mid-term window length and step
        stWin, stStep:        short-term window and step
        modelType:        "svm" or "knn"
        modelName:        name of the model to be saved
    RETURNS:
        None. Resulting regression model along with the respective model parameters are saved on files.
    '''
    # STEP A: Feature Extraction:
    [features, _, fileNames] = aF.dirsWavFeatureExtraction([dirName], mtWin, mtStep, stWin, stStep,
                                                           computeBEAT=computeBEAT)
    features = features[0]
    fileNames = [ntpath.basename(f) for f in fileNames[0]]

    # Read CSVs:
    CSVs = glob.glob(dirName + os.sep + "*.csv")
    regressionLabels = []
    regressionNames = []
    for c in CSVs:  # for each CSV
        curRegressionLabels = numpy.zeros((len(
            fileNames, )))  # read filenames, map to "fileNames" and append respective values in the regressionLabels
        with open(c, 'rb') as csvfile:
            CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in CSVreader:
                if len(row) == 2:
                    if row[0] + ".wav" in fileNames:
                        index = fileNames.index(row[0] + ".wav")
                        curRegressionLabels[index] = float(row[1])
        regressionLabels.append(
            curRegressionLabels)  # curRegressionLabels is the list of values for the current regression problem
        regressionNames.append(ntpath.basename(c).replace(".csv", ""))  # regression task name

    if len(features) == 0:
        print "ERROR: No data found in any input folder!"
        return

    numOfFeatures = features.shape[1]

    # TODO: ARRF WRITE????
    # STEP B: Classifier Evaluation and Parameter Selection:
    if modelType == "svm":
        modelParams = numpy.array([0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0])
    #    elif modelType == "knn":
    #        modelParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]);

    for iRegression, r in enumerate(regressionNames):
        # get optimal classifeir parameter:
        print "Regression task " + r
        bestParam = evaluateRegression(features, regressionLabels[iRegression], 100, modelType, modelParams)
        print "Selected params: {0:.5f}".format(bestParam)

        [featuresNorm, MEAN, STD] = normalizeFeatures([features])  # normalize features

        # STEP C: Save the model to file
        if modelType == "svm":
            Classifier, _ = trainSVMregression(featuresNorm[0], regressionLabels[iRegression], bestParam)
            Classifier.save_model(modelName + "_" + r)
            fo = open(modelName + "_" + r + "MEANS", "wb")
            cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            fo.close()
def featureAndTrain(listOfDirs, mtWin, mtStep, stWin, stStep, classifierType, modelName, computeBEAT=False,
                    perTrain=0.90):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        listOfDirs:        list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files.
        mtWin, mtStep:        mid-term window length and step
        stWin, stStep:        short-term window and step
        classifierType:        "svm" or "knn"
        modelName:        name of the model to be saved
    RETURNS:
        None. Resulting classifier along with the respective model parameters are saved on files.
    '''

    # STEP A: Feature Extraction:
    [features, classNames, _] = aF.dirsWavFeatureExtraction(listOfDirs, mtWin, mtStep, stWin, stStep,
                                                            computeBEAT=computeBEAT)

    if len(features) == 0:
        print "trainSVM_feature ERROR: No data found in any input folder!"
        return

    numOfFeatures = features[0].shape[1]
    featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)]

    writeTrainDataToARFF(modelName, features, classNames, featureNames)

    for i, f in enumerate(features):
        if len(f) == 0:
            print "trainSVM_feature ERROR: " + listOfDirs[i] + " folder is empty or non-existing!"
            return

    # STEP B: Classifier Evaluation and Parameter Selection:
    if classifierType == "svm":
        classifierParams = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0])
    elif classifierType == "knn":
        classifierParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15])
        # classifierParams = numpy.array([51])

    # get optimal classifeir parameter:
    bestParam = evaluateClassifier(features, classNames, 100, classifierType, classifierParams, 0, perTrain)

    print "Selected params: {0:.5f}".format(bestParam)

    C = len(classNames)
    [featuresNorm, MEAN, STD] = normalizeFeatures(features)  # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = featuresNorm

    # STEP C: Save the classifier to file
    if classifierType == "svm":
        Classifier = trainSVM(featuresNew, bestParam)
        Classifier.save_model(modelName)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifierType == "knn":
        [X, Y] = listOfFeatures2Matrix(featuresNew)
        X = X.tolist()
        Y = Y.tolist()
        fo = open(modelName, "wb")
        cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
Exemple #6
0
def featureAndTrain(listOfDirs,
                    mtWin,
                    mtStep,
                    stWin,
                    stStep,
                    classifierType,
                    modelName,
                    computeBEAT=False,
                    perTrain=0.90):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        listOfDirs:	list of paths of directories. Each directory contains a single audio class
        mtWin, mtStep:	mid-term window length and step
        stWin, stStep:	short-term window and step
        classifierType:	"svm" or "gradientboosting"
        modelName:	name of the model to be saved (path)
    RETURNS:
        None. Resulting classifier along with the respective model parameters are saved on files.
    '''

    # STEP A: Feature Extraction:
    [features, classNames,
     _] = aF.dirsWavFeatureExtraction(listOfDirs,
                                      mtWin,
                                      mtStep,
                                      stWin,
                                      stStep,
                                      computeBEAT=computeBEAT)

    if len(features) == 0:
        print "trainSVM_feature ERROR: No data found in any input folder!"
        return

    numOfFeatures = features[0].shape[1]
    featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)]

    writeTrainDataToARFF(modelName, features, classNames, featureNames)

    for i, f in enumerate(features):
        if len(f) == 0:
            print "trainSVM_feature ERROR: " + listOfDirs[
                i] + " folder is empty or non-existing!"
            return

    # STEP B: Classifier Evaluation and Parameter Selection:
    if classifierType == "svm":
        classifierParams = numpy.array(
            [0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0])
    elif classifierType == "gradientboosting":
        classifierParams = numpy.array([10, 25, 50, 100, 200, 500])

    # get optimal classifeir parameter:
    features2 = []
    for f in features:
        fTemp = []
        for i in range(f.shape[0]):
            temp = f[i, :]
            if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()):
                fTemp.append(temp.tolist())
            else:
                print "NaN Found! Feature vector not used for training"
        features2.append(numpy.array(fTemp))
    features = features2

    bestParam = evaluateClassifier(features, classNames, 100, classifierType,
                                   classifierParams, 0, perTrain)

    print "Selected params: {0:.5f}".format(bestParam)

    C = len(classNames)
    [featuresNorm, MEAN,
     STD] = normalizeFeatures(features)  # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = featuresNorm

    # STEP C: Save the classifier to file
    if classifierType == "svm":
        Classifier = trainSVM(featuresNew, bestParam)
    elif classifierType == "gradientboosting":
        Classifier = trainGradientBoosting(featuresNew, bestParam)

    if classifierType == "svm" or classifierType == "gradientboosting":
        with open(modelName, 'wb') as fid:  # save to file
            cPickle.dump(Classifier, fid)
        fo = open(modelName + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()