def featureAndTrainRegression(dirName, mtWin, mtStep, stWin, stStep, modelType, modelName, computeBEAT=False): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: dirName: path of directory containing the WAV files and Regression CSVs mtWin, mtStep: mid-term window length and step stWin, stStep: short-term window and step modelType: "svm" or "knn" or "randomforest" modelName: name of the model to be saved RETURNS: None. Resulting regression model along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, _, fileNames] = aF.dirsWavFeatureExtraction([dirName], mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) features = features[0] fileNames = [ntpath.basename(f) for f in fileNames[0]] # Read CSVs: CSVs = glob.glob(dirName + os.sep + "*.csv") regressionLabels = [] regressionNames = [] for c in CSVs: # for each CSV curRegressionLabels = numpy.zeros( (len(fileNames, )) ) # read filenames, map to "fileNames" and append respective values in the regressionLabels with open(c, 'rb') as csvfile: CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in CSVreader: if len(row) == 2: if row[0] + ".wav" in fileNames: index = fileNames.index(row[0] + ".wav") curRegressionLabels[index] = float(row[1]) regressionLabels.append( curRegressionLabels ) # curRegressionLabels is the list of values for the current regression problem regressionNames.append(ntpath.basename(c).replace( ".csv", "")) # regression task name if len(features) == 0: print "ERROR: No data found in any input folder!" return numOfFeatures = features.shape[1] # TODO: ARRF WRITE???? # STEP B: Classifier Evaluation and Parameter Selection: if modelType == "svm": modelParams = numpy.array( [0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0]) elif modelType == "randomforest": modelParams = numpy.array([5, 10, 25, 50, 100]) # elif modelType == "knn": # modelParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]); for iRegression, r in enumerate(regressionNames): # get optimal classifeir parameter: print "Regression task " + r bestParam = evaluateRegression(features, regressionLabels[iRegression], 100, modelType, modelParams) print "Selected params: {0:.5f}".format(bestParam) [featuresNorm, MEAN, STD] = normalizeFeatures([features]) # normalize features # STEP C: Save the model to file if modelType == "svm": Classifier, _ = trainSVMregression(featuresNorm[0], regressionLabels[iRegression], bestParam) with open(modelName + "_" + r, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "_" + r + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() ''' TODO
def featureAndTrain(listOfDirs, mtWin, mtStep, stWin, stStep, classifierType, modelName, computeBEAT=False, perTrain=0.90): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: listOfDirs: list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files. mtWin, mtStep: mid-term window length and step stWin, stStep: short-term window and step classifierType: "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees" modelName: name of the model to be saved RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) if len(features) == 0: print "trainSVM_feature ERROR: No data found in any input folder!" return numOfFeatures = features[0].shape[1] featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)] writeTrainDataToARFF(modelName, features, classNames, featureNames) for i, f in enumerate(features): if len(f) == 0: print "trainSVM_feature ERROR: " + listOfDirs[ i] + " folder is empty or non-existing!" return # STEP B: Classifier Evaluation and Parameter Selection: if classifierType == "svm" or classifierType == "svm_rbf": classifierParams = numpy.array( [0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]) elif classifierType == "randomforest": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) elif classifierType == "knn": classifierParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifierType == "gradientboosting": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) elif classifierType == "extratrees": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) # get optimal classifeir parameter: bestParam = evaluateClassifier(features, classNames, 100, classifierType, classifierParams, 0, perTrain) print "Selected params: {0:.5f}".format(bestParam) C = len(classNames) [featuresNorm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm # STEP C: Save the classifier to file if classifierType == "svm": Classifier = trainSVM(featuresNew, bestParam) with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "svm_rbf": Classifier = trainSVM_RBF(featuresNew, bestParam) with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "randomforest": Classifier = trainRandomForest(featuresNew, bestParam) with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "gradientboosting": Classifier = trainGradientBoosting(featuresNew, bestParam) with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "extratrees": Classifier = trainExtraTrees(featuresNew, bestParam) with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "knn": [X, Y] = listOfFeatures2Matrix(featuresNew) X = X.tolist() Y = Y.tolist() fo = open(modelName, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, model_type, model_name, compute_beat=False): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: dir_name: path of directory containing the WAV files and Regression CSVs mt_win, mt_step: mid-term window length and step st_win, st_step: short-term window and step model_type: "svm" or "knn" or "randomforest" model_name: name of the model to be saved RETURNS: None. Resulting regression model along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, _, filenames] = aF.dirsWavFeatureExtraction([dir_name], mt_win, mt_step, st_win, st_step, compute_beat=compute_beat) features = features[0] filenames = [ntpath.basename(f) for f in filenames[0]] f_final = [] # Read CSVs: CSVs = glob.glob(dir_name + os.sep + "*.csv") regression_labels = [] regression_names = [] f_final = [] for c in CSVs: # for each CSV cur_regression_labels = [] f_temp = [] with open( c, 'rt' ) as csvfile: # open the csv file that contains the current target value's annotations CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in CSVreader: if len( row ) == 2: # if the current row contains two fields (filename, target value) if row[0] in filenames: # ... and if the current filename exists in the list of filenames index = filenames.index(row[0]) cur_regression_labels.append(float(row[1])) f_temp.append(features[index, :]) else: print("Warning: {} not found in list of files.".format( row[0])) else: print( "Warning: Row with unknown format in regression file") f_final.append(numpy.array(f_temp)) regression_labels.append( numpy.array(cur_regression_labels) ) # cur_regression_labels is the list of values for the current regression problem regression_names.append(ntpath.basename(c).replace( ".csv", "")) # regression task name if len(features) == 0: print("ERROR: No data found in any input folder!") return n_feats = f_final[0].shape[1] # TODO: ARRF WRITE???? # STEP B: classifier Evaluation and Parameter Selection: if model_type == "svm" or model_type == "svm_rbf": model_params = numpy.array( [0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0]) elif model_type == "randomforest": model_params = numpy.array([5, 10, 25, 50, 100]) # elif model_type == "knn": # model_params = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]); errors = [] errors_base = [] best_params = [] for iRegression, r in enumerate(regression_names): # get optimal classifeir parameter: print("Regression task " + r) bestParam, error, berror = evaluateRegression( f_final[iRegression], regression_labels[iRegression], 100, model_type, model_params) errors.append(error) errors_base.append(berror) best_params.append(bestParam) print("Selected params: {0:.5f}".format(bestParam)) [features_norm, MEAN, STD] = normalizeFeatures([f_final[iRegression]]) # normalize features # STEP C: Save the model to file if model_type == "svm": classifier, _ = trainSVMregression(features_norm[0], regression_labels[iRegression], bestParam) if model_type == "svm_rbf": classifier, _ = trainSVMregression_rbf( features_norm[0], regression_labels[iRegression], bestParam) if model_type == "randomforest": classifier, _ = trainRandomForestRegression( features_norm[0], regression_labels[iRegression], bestParam) if model_type == "svm" or model_type == "svm_rbf" or model_type == "randomforest": with open(model_name + "_" + r, 'wb') as fid: cPickle.dump(classifier, fid) fo = open(model_name + "_" + r + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() return errors, errors_base, best_params
def featureAndTrainRegression(dirName, mtWin, mtStep, stWin, stStep, modelType, modelName, computeBEAT=False): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: dirName: path of directory containing the WAV files and Regression CSVs mtWin, mtStep: mid-term window length and step stWin, stStep: short-term window and step modelType: "svm" or "knn" modelName: name of the model to be saved RETURNS: None. Resulting regression model along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, _, fileNames] = aF.dirsWavFeatureExtraction([dirName], mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) features = features[0] fileNames = [ntpath.basename(f) for f in fileNames[0]] # Read CSVs: CSVs = glob.glob(dirName + os.sep + "*.csv") regressionLabels = [] regressionNames = [] for c in CSVs: # for each CSV curRegressionLabels = numpy.zeros((len( fileNames, ))) # read filenames, map to "fileNames" and append respective values in the regressionLabels with open(c, 'rb') as csvfile: CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in CSVreader: if len(row) == 2: if row[0] + ".wav" in fileNames: index = fileNames.index(row[0] + ".wav") curRegressionLabels[index] = float(row[1]) regressionLabels.append( curRegressionLabels) # curRegressionLabels is the list of values for the current regression problem regressionNames.append(ntpath.basename(c).replace(".csv", "")) # regression task name if len(features) == 0: print "ERROR: No data found in any input folder!" return numOfFeatures = features.shape[1] # TODO: ARRF WRITE???? # STEP B: Classifier Evaluation and Parameter Selection: if modelType == "svm": modelParams = numpy.array([0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0]) # elif modelType == "knn": # modelParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]); for iRegression, r in enumerate(regressionNames): # get optimal classifeir parameter: print "Regression task " + r bestParam = evaluateRegression(features, regressionLabels[iRegression], 100, modelType, modelParams) print "Selected params: {0:.5f}".format(bestParam) [featuresNorm, MEAN, STD] = normalizeFeatures([features]) # normalize features # STEP C: Save the model to file if modelType == "svm": Classifier, _ = trainSVMregression(featuresNorm[0], regressionLabels[iRegression], bestParam) Classifier.save_model(modelName + "_" + r) fo = open(modelName + "_" + r + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def featureAndTrain(listOfDirs, mtWin, mtStep, stWin, stStep, classifierType, modelName, computeBEAT=False, perTrain=0.90): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: listOfDirs: list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files. mtWin, mtStep: mid-term window length and step stWin, stStep: short-term window and step classifierType: "svm" or "knn" modelName: name of the model to be saved RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) if len(features) == 0: print "trainSVM_feature ERROR: No data found in any input folder!" return numOfFeatures = features[0].shape[1] featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)] writeTrainDataToARFF(modelName, features, classNames, featureNames) for i, f in enumerate(features): if len(f) == 0: print "trainSVM_feature ERROR: " + listOfDirs[i] + " folder is empty or non-existing!" return # STEP B: Classifier Evaluation and Parameter Selection: if classifierType == "svm": classifierParams = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0]) elif classifierType == "knn": classifierParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) # classifierParams = numpy.array([51]) # get optimal classifeir parameter: bestParam = evaluateClassifier(features, classNames, 100, classifierType, classifierParams, 0, perTrain) print "Selected params: {0:.5f}".format(bestParam) C = len(classNames) [featuresNorm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm # STEP C: Save the classifier to file if classifierType == "svm": Classifier = trainSVM(featuresNew, bestParam) Classifier.save_model(modelName) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifierType == "knn": [X, Y] = listOfFeatures2Matrix(featuresNew) X = X.tolist() Y = Y.tolist() fo = open(modelName, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def featureAndTrain(listOfDirs, mtWin, mtStep, stWin, stStep, classifierType, modelName, computeBEAT=False, perTrain=0.90): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: listOfDirs: list of paths of directories. Each directory contains a single audio class mtWin, mtStep: mid-term window length and step stWin, stStep: short-term window and step classifierType: "svm" or "gradientboosting" modelName: name of the model to be saved (path) RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) if len(features) == 0: print "trainSVM_feature ERROR: No data found in any input folder!" return numOfFeatures = features[0].shape[1] featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)] writeTrainDataToARFF(modelName, features, classNames, featureNames) for i, f in enumerate(features): if len(f) == 0: print "trainSVM_feature ERROR: " + listOfDirs[ i] + " folder is empty or non-existing!" return # STEP B: Classifier Evaluation and Parameter Selection: if classifierType == "svm": classifierParams = numpy.array( [0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]) elif classifierType == "gradientboosting": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) # get optimal classifeir parameter: features2 = [] for f in features: fTemp = [] for i in range(f.shape[0]): temp = f[i, :] if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()): fTemp.append(temp.tolist()) else: print "NaN Found! Feature vector not used for training" features2.append(numpy.array(fTemp)) features = features2 bestParam = evaluateClassifier(features, classNames, 100, classifierType, classifierParams, 0, perTrain) print "Selected params: {0:.5f}".format(bestParam) C = len(classNames) [featuresNorm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm # STEP C: Save the classifier to file if classifierType == "svm": Classifier = trainSVM(featuresNew, bestParam) elif classifierType == "gradientboosting": Classifier = trainGradientBoosting(featuresNew, bestParam) if classifierType == "svm" or classifierType == "gradientboosting": with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()