def saveClassifierModel(features,model_name,classifier_type,classifier,MEAN,STD,classNames,bestParam): if classifier_type == "knn": [X, Y] = aT.listOfFeatures2Matrix(features) X = X.tolist() Y = Y.tolist() fo = open(model_name, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getMtWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getMtStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getStWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getStStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.__compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifier_type == AudioClassifierManager.__svmModelName or classifier_type == AudioClassifierManager.__svmRbfModelName or \ classifier_type == AudioClassifierManager.__randomforestModelName or \ classifier_type == AudioClassifierManager.__gradientboostingModelName or \ classifier_type == AudioClassifierManager.__extratreesModelName: with open(model_name, 'wb') as fid: cPickle.dump(classifier, fid) fo = open(model_name + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getMtWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getMtStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getStWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.getStStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(AudioClassifierManager.__compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def train(featuresNew, param): [X, Y] = aT.listOfFeatures2Matrix(featuresNew) clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, param), random_state=1) clf.fit(X, Y) return clf
def train(files): #extract feature features, classes, filenames = aF.dirsWavFeatureExtraction( files, 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep) #normalize [featuresNorm, MEAN, STD] = aT.normalizeFeatures(features) [X, Y] = aT.listOfFeatures2Matrix(featuresNorm) #train using SVM clf = sklearn.svm.SVC(kernel='linear', probability=True) clf.fit(X, Y) return clf, MEAN, STD
def trainLogisticRegression(features, Cparam): ''' Train a multi-class probabilitistic Logistic Regression classifier. Note: This function is simply a wrapper to the sklearn functionality for logistic regression training ARGUMENTS: - features: a list ([numOfClasses x 1]) whose elements containt numpy matrices of features each matrix features[i] of class i is [n_samples x numOfDimensions] - Cparam: Logistic Regression parameter C (Inverse of regularization strength) RETURNS: - lr: the trained logistic regression variable NOTE: This function trains a Logistic Regression model for a given C value. For a different kernel, other types of parameters should be provided. ''' [X, Y] = listOfFeatures2Matrix(features) lr = sklearn.linear_model.LogisticRegression(C=Cparam, multi_class="ovr") lr.fit(X,Y) return lr
def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, classifier_type, model_name, compute_beat=False, perTrain=0.90, feats=["gfcc", "mfcc"]): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: list_of_dirs: list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files. mt_win, mt_step: mid-term window length and step st_win, st_step: short-term window and step classifier_type: "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees" model_name: name of the model to be saved RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs, mt_win, mt_step, st_win, st_step, compute_beat=compute_beat, feats=feats) if len(features) == 0: print("trainSVM_feature ERROR: No data found in any input folder!") return n_feats = features[0].shape[1] feature_names = ["features" + str(d + 1) for d in range(n_feats)] writeTrainDataToARFF(model_name, features, classNames, feature_names) for i, f in enumerate(features): if len(f) == 0: print("trainSVM_feature ERROR: " + list_of_dirs[i] + " folder is empty or non-existing!") return # STEP B: classifier Evaluation and Parameter Selection: if classifier_type == "svm" or classifier_type == "svm_rbf": classifier_par = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]) elif classifier_type == "randomforest": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "knn": classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifier_type == "gradientboosting": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "extratrees": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "logisticregression": classifier_par = numpy.array([0.01, 0.1, 1, 5]) # get optimal classifeir parameter: features2 = [] for f in features: fTemp = [] for i in range(f.shape[0]): temp = f[i,:] if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()) : fTemp.append(temp.tolist()) else: print("NaN Found! Feature vector not used for training") features2.append(numpy.array(fTemp)) features = features2 bestParam = evaluateclassifier(features, classNames, 300, classifier_type, classifier_par, 0, perTrain) # Hier!!!! print("Selected params: {0:.5f}".format(bestParam)) C = len(classNames) [features_norm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = features_norm # STEP C: Save the classifier to file if classifier_type == "svm": classifier = trainSVM(featuresNew, bestParam) elif classifier_type == "svm_rbf": classifier = trainSVM_RBF(featuresNew, bestParam) elif classifier_type == "randomforest": classifier = trainRandomForest(featuresNew, bestParam) elif classifier_type == "gradientboosting": classifier = trainGradientBoosting(featuresNew, bestParam) elif classifier_type == "extratrees": classifier = trainExtraTrees(featuresNew, bestParam) elif classifier_type == "logisticregression": classifier = trainLogisticRegression(featuresNew, bestParam) if classifier_type == "knn": [X, Y] = listOfFeatures2Matrix(featuresNew) X = X.tolist() Y = Y.tolist() fo = open(model_name, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifier_type == "svm" or classifier_type == "svm_rbf" or \ classifier_type == "randomforest" or \ classifier_type == "gradientboosting" or \ classifier_type == "extratrees" or \ classifier_type == "logisticregression": with open(model_name, 'wb') as fid: cPickle.dump(classifier, fid) fo = open(model_name + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
#dirs= ["C:\\Users\\zacha\\PycharmProjects\\untitled\\samples"] data = [] ids = [] for i in xrange(0, len(dirs)): # Iterate through each test directory dir = dirs[i] os.chdir(dir) for file in glob.glob("*.npy"): features = numpy.load(file) ids.append(i) temp = [] for f in features: temp.append(f[0]) data.append(temp) data = numpy.array(data) [X, Y] = listOfFeatures2Matrix(data) kmeans = KMeans(n_clusters=len(dirs)).fit(X, Y) zero = [0, 0, 0] one = [0, 0, 0] two = [0, 0, 0] #pickle.dump(kmeans, 'birds.km') assert len(ids) == len(kmeans.labels_) assert len(ids) == 180 for i in xrange(0, len(ids)): if ids[i] == 0: zero[kmeans.labels_[i]] += 1 elif ids[i] == 1: one[kmeans.labels_[i]] += 1 elif ids[i] == 2: two[kmeans.labels_[i]] += 1 else: