def Classify(clf, featuresAll, bestParam): if clf == 'svm': model = aT.trainSVM(featuresAll, bestParam) elif clf == 'svm_rbf': model = aT.trainSVM_RBF(featuresAll, bestParam) elif clf == 'extratrees': model = aT.trainExtraTrees(featuresAll, bestParam) elif clf == 'randomforest': model = aT.trainRandomForest(featuresAll, bestParam) elif clf == 'knn': model = aT.trainKNN(featuresAll, bestParam) elif clf == 'gradientboosting': model = aT.trainGradientBoosting(featuresAll, bestParam) return model
def getTrainClassifier(f_train,classifier_name,param): if classifier_name == AudioClassifierManager.__svmModelName: classifier = aT.trainSVM(f_train, param) elif classifier_name == AudioClassifierManager.__svmRbfModelName: classifier = aT.trainSVM_RBF(f_train, param) elif classifier_name == AudioClassifierManager.__knnModelName: classifier = aT.trainKNN(f_train, param) elif classifier_name == AudioClassifierManager.__randomforestModelName: classifier = aT.trainRandomForest(f_train, param) elif classifier_name == AudioClassifierManager.__gradientboostingModelName: classifier = aT.trainGradientBoosting(f_train, param) elif classifier_name == AudioClassifierManager.__extratreesModelName: classifier = aT.trainExtraTrees(f_train, param) else: classifier = None return classifier
def evaluateclassifier(features, class_names, n_exp, classifier_name, Params, parameterMode, perTrain=0.90): ''' ARGUMENTS: features: a list ([numOfClasses x 1]) whose elements containt numpy matrices of features. each matrix features[i] of class i is [n_samples x numOfDimensions] class_names: list of class names (strings) n_exp: number of cross-validation experiments classifier_name: svm or knn or randomforest Params: list of classifier parameters (for parameter tuning during cross-validation) parameterMode: 0: choose parameters that lead to maximum overall classification ACCURACY 1: choose parameters that lead to maximum overall f1 MEASURE RETURNS: bestParam: the value of the input parameter that optimizes the selected performance measure ''' # feature normalization: (features_norm, MEAN, STD) = normalizeFeatures(features) #features_norm = features; n_classes = len(features) ac_all = [] f1_all = [] precision_classes_all = [] recall_classes_all = [] f1_classes_all = [] cms_all = [] # compute total number of samples: n_samples_total = 0 for f in features: n_samples_total += f.shape[0] if n_samples_total > 1000 and n_exp > 50: n_exp = 50 print("Number of training experiments changed to 50 due to high number of samples") if n_samples_total > 2000 and n_exp > 10: n_exp = 10 print("Number of training experiments changed to 10 due to high number of samples") for Ci, C in enumerate(Params): # for each param value cm = numpy.zeros((n_classes, n_classes)) for e in range(n_exp): # for each cross-validation iteration: print("Param = {0:.5f} - classifier Evaluation " "Experiment {1:d} of {2:d}".format(C, e+1, n_exp)) # split features: f_train, f_test = randSplitFeatures(features_norm, perTrain) # train multi-class svms: if classifier_name == "svm": classifier = trainSVM(f_train, C) elif classifier_name == "svm_rbf": classifier = trainSVM_RBF(f_train, C) elif classifier_name == "knn": classifier = trainKNN(f_train, C) elif classifier_name == "randomforest": classifier = trainRandomForest(f_train, C) elif classifier_name == "gradientboosting": classifier = trainGradientBoosting(f_train, C) elif classifier_name == "extratrees": classifier = trainExtraTrees(f_train, C) elif classifier_name == "logisticregression": classifier = trainLogisticRegression(f_train, C) cmt = numpy.zeros((n_classes, n_classes)) for c1 in range(n_classes): n_test_samples = len(f_test[c1]) res = numpy.zeros((n_test_samples, 1)) for ss in range(n_test_samples): [res[ss], _] = classifierWrapperHead(classifier, classifier_name, f_test[c1][ss]) for c2 in range(n_classes): cmt[c1][c2] = float(len(numpy.nonzero(res == c2)[0])) cm = cm + cmt cm = cm + 0.0000000010 rec = numpy.zeros((cm.shape[0], )) pre = numpy.zeros((cm.shape[0], )) for ci in range(cm.shape[0]): rec[ci] = cm[ci, ci] / numpy.sum(cm[ci, :]) pre[ci] = cm[ci, ci] / numpy.sum(cm[:, ci]) precision_classes_all.append(pre) recall_classes_all.append(rec) f1 = 2 * rec * pre / (rec + pre) f1_classes_all.append(f1) ac_all.append(numpy.sum(numpy.diagonal(cm)) / numpy.sum(cm)) cms_all.append(cm) f1_all.append(numpy.mean(f1)) print("\t\t", end="") for i, c in enumerate(class_names): if i == len(class_names)-1: print("{0:s}\t\t".format(c), end="") else: print("{0:s}\t\t\t".format(c), end="") print("OVERALL") print("\tC", end="") for c in class_names: print("\tPRE\tREC\tf1", end="") print("\t{0:s}\t{1:s}".format("ACC", "f1")) best_ac_ind = numpy.argmax(ac_all) best_f1_ind = numpy.argmax(f1_all) for i in range(len(precision_classes_all)): print("\t{0:.3f}".format(Params[i]), end="") for c in range(len(precision_classes_all[i])): print("\t{0:.1f}\t{1:.1f}\t{2:.1f}".format(100.0 * precision_classes_all[i][c], 100.0 * recall_classes_all[i][c], 100.0 * f1_classes_all[i][c]), end="") print("\t{0:.1f}\t{1:.1f}".format(100.0 * ac_all[i], 100.0 * f1_all[i]), end="") if i == best_f1_ind: print("\t best f1", end="") if i == best_ac_ind: print("\t best Acc", end="") print("") if parameterMode == 0: # keep parameters that maximize overall classification accuracy: print("Confusion Matrix:") printConfusionMatrix(cms_all[best_ac_ind], class_names) return Params[best_ac_ind] elif parameterMode == 1: # keep parameters that maximize overall f1 measure: print("Confusion Matrix:") printConfusionMatrix(cms_all[best_f1_ind], class_names) return Params[best_f1_ind]