Esempio n. 1
0
def evaluate(features, ClassNames, nExp, Params, parameterMode, perTrain=0.80):
    (featuresNorm, MEAN, STD) = aT.normalizeFeatures(features)
    nClasses = len(features)
    CAll = []
    acAll = []
    F1All = []
    PrecisionClassesAll = []
    RecallClassesAll = []
    ClassesAll = []
    F1ClassesAll = []
    CMsAll = []

    # compute total number of samples:
    nSamplesTotal = 0
    for f in features:
        nSamplesTotal += f.shape[0]
    if nSamplesTotal > 1000 and nExp > 50:
        nExp = 50
        print "Number of training experiments changed to 50 due to high number of samples"
    if nSamplesTotal > 2000 and nExp > 10:
        nExp = 10
        print "Number of training experiments changed to 10 due to high number of samples"

    for Ci, C in enumerate(Params):  # for each param value
        CM = numpy.zeros((nClasses, nClasses))
        for e in range(nExp):  # for each cross-validation iteration:
            print "Param = {0:.5f} - Classifier Evaluation Experiment {1:d} of {2:d}".format(
                C, e + 1, nExp)
            featuresTrain, featuresTest = aT.randSplitFeatures(
                featuresNorm, perTrain)
            Classifier = train(featuresTrain, C)

            CMt = numpy.zeros((nClasses, nClasses))
            for c1 in range(nClasses):
                nTestSamples = len(featuresTest[c1])
                Results = numpy.zeros((nTestSamples, 1))
                for ss in range(nTestSamples):
                    [Results[ss], _] = classify(Classifier,
                                                featuresTest[c1][ss])
                for c2 in range(nClasses):
                    CMt[c1][c2] = float(len(numpy.nonzero(Results == c2)[0]))
            CM = CM + CMt
        CM = CM + 0.0000000010
        Rec = numpy.zeros((CM.shape[0], ))
        Pre = numpy.zeros((CM.shape[0], ))

        for ci in range(CM.shape[0]):
            Rec[ci] = CM[ci, ci] / numpy.sum(CM[ci, :])
            Pre[ci] = CM[ci, ci] / numpy.sum(CM[:, ci])
        PrecisionClassesAll.append(Pre)
        RecallClassesAll.append(Rec)
        F1 = 2 * Rec * Pre / (Rec + Pre)
        F1ClassesAll.append(F1)
        acAll.append(numpy.sum(numpy.diagonal(CM)) / numpy.sum(CM))

        CMsAll.append(CM)
        F1All.append(numpy.mean(F1))

    print("\t\t"),
    for i, c in enumerate(ClassNames):
        if i == len(ClassNames) - 1:
            print "{0:s}\t\t".format(c),
        else:
            print "{0:s}\t\t\t".format(c),
    print("OVERALL")
    print("\tC"),
    for c in ClassNames:
        print "\tPRE\tREC\tF1",
    print "\t{0:s}\t{1:s}".format("ACC", "F1")
    bestAcInd = numpy.argmax(acAll)
    bestF1Ind = numpy.argmax(F1All)
    for i in range(len(PrecisionClassesAll)):
        print "\t{0:.3f}".format(Params[i]),
        for c in range(len(PrecisionClassesAll[i])):
            print "\t{0:.1f}\t{1:.1f}\t{2:.1f}".format(
                100.0 * PrecisionClassesAll[i][c],
                100.0 * RecallClassesAll[i][c], 100.0 * F1ClassesAll[i][c]),
        print "\t{0:.1f}\t{1:.1f}".format(100.0 * acAll[i], 100.0 * F1All[i]),
        if i == bestF1Ind:
            print "\t best F1",
        if i == bestAcInd:
            print "\t best Acc",
        print
    return Params[bestF1Ind]
def evaluateclassifier(features, class_names, n_exp, classifier_name, Params, parameterMode, perTrain=0.90):
    '''
    ARGUMENTS:
        features:     a list ([numOfClasses x 1]) whose elements containt numpy matrices of features.
                each matrix features[i] of class i is [n_samples x numOfDimensions]
        class_names:    list of class names (strings)
        n_exp:        number of cross-validation experiments
        classifier_name: svm or knn or randomforest
        Params:        list of classifier parameters (for parameter tuning during cross-validation)
        parameterMode:    0: choose parameters that lead to maximum overall classification ACCURACY
                1: choose parameters that lead to maximum overall f1 MEASURE
    RETURNS:
         bestParam:    the value of the input parameter that optimizes the selected performance measure
    '''

    # feature normalization:
    (features_norm, MEAN, STD) = normalizeFeatures(features)
    #features_norm = features;
    n_classes = len(features)
    ac_all = []
    f1_all = []
    precision_classes_all = []
    recall_classes_all = []
    f1_classes_all = []
    cms_all = []

    # compute total number of samples:
    n_samples_total = 0
    for f in features:
        n_samples_total += f.shape[0]
    if n_samples_total > 1000 and n_exp > 50:
        n_exp = 50
        print("Number of training experiments changed to 50 due to high number of samples")
    if n_samples_total > 2000 and n_exp > 10:
        n_exp = 10
        print("Number of training experiments changed to 10 due to high number of samples")

    for Ci, C in enumerate(Params):
        # for each param value
        cm = numpy.zeros((n_classes, n_classes))
        for e in range(n_exp):
            # for each cross-validation iteration:
            print("Param = {0:.5f} - classifier Evaluation "
                  "Experiment {1:d} of {2:d}".format(C, e+1, n_exp))
            # split features:
            f_train, f_test = randSplitFeatures(features_norm, perTrain)
            # train multi-class svms:
            if classifier_name == "svm":
                classifier = trainSVM(f_train, C)
            elif classifier_name == "svm_rbf":
                classifier = trainSVM_RBF(f_train, C)
            elif classifier_name == "knn":
                classifier = trainKNN(f_train, C)
            elif classifier_name == "randomforest":
                classifier = trainRandomForest(f_train, C)
            elif classifier_name == "gradientboosting":
                classifier = trainGradientBoosting(f_train, C)
            elif classifier_name == "extratrees":
                classifier = trainExtraTrees(f_train, C)
            elif classifier_name == "logisticregression":
                classifier = trainLogisticRegression(f_train, C)

            cmt = numpy.zeros((n_classes, n_classes))
            for c1 in range(n_classes):
                n_test_samples = len(f_test[c1])
                res = numpy.zeros((n_test_samples, 1))
                for ss in range(n_test_samples):
                    [res[ss], _] = classifierWrapperHead(classifier,
                                                     classifier_name,
                                                     f_test[c1][ss])
                for c2 in range(n_classes):
                    cmt[c1][c2] = float(len(numpy.nonzero(res == c2)[0]))
            cm = cm + cmt
        cm = cm + 0.0000000010
        rec = numpy.zeros((cm.shape[0], ))
        pre = numpy.zeros((cm.shape[0], ))

        for ci in range(cm.shape[0]):
            rec[ci] = cm[ci, ci] / numpy.sum(cm[ci, :])
            pre[ci] = cm[ci, ci] / numpy.sum(cm[:, ci])
        precision_classes_all.append(pre)
        recall_classes_all.append(rec)
        f1 = 2 * rec * pre / (rec + pre)
        f1_classes_all.append(f1)
        ac_all.append(numpy.sum(numpy.diagonal(cm)) / numpy.sum(cm))

        cms_all.append(cm)
        f1_all.append(numpy.mean(f1))

    print("\t\t", end="")
    for i, c in enumerate(class_names):
        if i == len(class_names)-1:
            print("{0:s}\t\t".format(c), end="")
        else:
            print("{0:s}\t\t\t".format(c), end="")
    print("OVERALL")
    print("\tC", end="")
    for c in class_names:
        print("\tPRE\tREC\tf1", end="")
    print("\t{0:s}\t{1:s}".format("ACC", "f1"))
    best_ac_ind = numpy.argmax(ac_all)
    best_f1_ind = numpy.argmax(f1_all)
    for i in range(len(precision_classes_all)):
        print("\t{0:.3f}".format(Params[i]), end="")
        for c in range(len(precision_classes_all[i])):
            print("\t{0:.1f}\t{1:.1f}\t{2:.1f}".format(100.0 * precision_classes_all[i][c],
                                                       100.0 * recall_classes_all[i][c],
                                                       100.0 * f1_classes_all[i][c]), end="")
        print("\t{0:.1f}\t{1:.1f}".format(100.0 * ac_all[i], 100.0 * f1_all[i]), end="")
        if i == best_f1_ind:
            print("\t best f1", end="")
        if i == best_ac_ind:
            print("\t best Acc", end="")
        print("")

    if parameterMode == 0:    # keep parameters that maximize overall classification accuracy:
        print("Confusion Matrix:")
        printConfusionMatrix(cms_all[best_ac_ind], class_names)
        return Params[best_ac_ind]
    elif parameterMode == 1:  # keep parameters that maximize overall f1 measure:
        print("Confusion Matrix:")
        printConfusionMatrix(cms_all[best_f1_ind], class_names)
        return Params[best_f1_ind]
Esempio n. 3
0
    def getResultMatrixAndBestParam(features, class_names, classifier_name, parameterMode, perTrain=0.90, model_name='',Params=[]):
        '''
        ARGUMENTS:
            features:     a list ([numOfClasses x 1]) whose elements containt numpy matrices of features.
                    each matrix features[i] of class i is [n_samples x numOfDimensions]
            class_names:    list of class names (strings)
            n_exp:        number of cross-validation experiments
            classifier_name: svm or knn or randomforest
            Params:        list of classifier parameters (for parameter tuning during cross-validation)
            parameterMode:    0: choose parameters that lead to maximum overall classification ACCURACY
                    1: choose parameters that lead to maximum overall f1 MEASURE
        RETURNS:
             bestParam:    the value of the input parameter that optimizes the selected performance measure
             confufionMatrix

        '''
        # feature normalization:
        (features_norm, MEAN, STD) = aT.normalizeFeatures(features)

        n_classes = len(features)
        ac_all = []
        f1_all = []
        precision_classes_all = []
        recall_classes_all = []
        f1_classes_all = []
        cms_all = []
        smooth = 0.0000000010

        # Optimize number of experiment
        n_exp = AudioClassifierManager.getOptimalNumberExperiment(features,AudioClassifierManager.__num_experiment)

        Params = AudioClassifierManager.getListParamsForClassifierType(classifier_name) if len(Params)==0 else Params

        # For each param value
        for Ci, C in enumerate(Params):
            # Init confusion matrix
            cm = numpy.zeros((n_classes, n_classes))
            for e in range(n_exp):
                # Split features in Train and Test:
                f_train, f_test = aT.randSplitFeatures(features_norm, perTrain)
                countFTrain = 0
                countFTest = 0
                for g in f_train:
                    for track in g:
                        countFTrain += 1
                for g in f_test:
                    for track in g:
                        countFTest += 1

                if(countFTest == 0):
                    print("WARNING: {0} has no test values".format(class_names[Ci]))

                # for each cross-validation iteration:
                print("Param = {0:.5f} - classifier Evaluation "
                      "Experiment {1:d} of {2:d} - lenTrainingSet {3} lenTestSet {4}".format(C, e + 1, n_exp,
                                                                                                        countFTrain,
                                                                                                        countFTest))

                # Get Classifier for train
                classifier = AudioClassifierManager.getTrainClassifier(f_train,classifier_name,C)


                cmt = numpy.zeros((n_classes, n_classes))
                for c1 in range(n_classes):
                    #print("==> Class {1}: {0} for exp {2}".format(class_names[c1],c1,e))
                    n_test_samples = len(f_test[c1])
                    res = numpy.zeros((n_test_samples, 1))
                    for ss in range(n_test_samples):
                        [res[ss], _] = aT.classifierWrapper(classifier,
                                                         classifier_name,
                                                         f_test[c1][ss])
                    for c2 in range(n_classes):
                        nnzero = numpy.nonzero(res == c2)[0]
                        rlen = len(nnzero)
                        cmt[c1][c2] = float(rlen)
                        #print("cmt[{0}][{1}] = {2}".format(c1,c2,float(rlen)))
                cm = cm + cmt


            cm = cm + smooth
            rec = numpy.zeros((cm.shape[0],))
            pre = numpy.zeros((cm.shape[0],))

            # Calculate Precision, Recall and f1 Misure
            for ci in range(cm.shape[0]):
                rec[ci] = cm[ci, ci] / numpy.sum(cm[ci, :])
                pre[ci] = cm[ci, ci] / numpy.sum(cm[:, ci])
            precision_classes_all.append(pre)
            recall_classes_all.append(rec)
            f1 = 2 * rec * pre / (rec + pre)
            f1_classes_all.append(f1)
            ac_all.append(numpy.sum(numpy.diagonal(cm)) / numpy.sum(cm))

            cms_all.append(cm)
            f1_all.append(numpy.mean(f1))


        best_ac_ind = numpy.argmax(ac_all)
        best_f1_ind = numpy.argmax(f1_all)
        bestParam = 0
        resultConfusionMatrix = None
        if parameterMode == AudioClassifierManager.BEST_ACCURACY:
            bestParam = Params[best_ac_ind]
            resultConfusionMatrix = cms_all[best_ac_ind]
        elif parameterMode == AudioClassifierManager.BEST_F1:
            bestParam = Params[best_f1_ind]
            resultConfusionMatrix = cms_all[best_f1_ind]

        return bestParam, resultConfusionMatrix, precision_classes_all, recall_classes_all, f1_classes_all, f1_all, ac_all