def runWithIGR(featureSize, modelCount):
    X_raw, y = common.loadTrainingDataSet()

    reducer = InformationGainReducer()
    reducer.fit(X_raw, y)
    reducer.resize(featureSize)
    X = reducer.transform(X_raw).toarray()

    modelList = []

    for modelNum in range(modelCount):
        rs = 42 + modelNum
        rus = RandomUnderSampler(random_state=rs)
        X_model, y_model = rus.fit_resample(X, y)

        nbClassifier = NaiveBayesClassifier()
        nbClassifier.fit(X_model, y_model)

        modelList.append(nbClassifier)

    X_test_raw = common.loadTestDataSet()
    X_test = reducer.transform(X_test_raw).toarray()
    combinedModelOutput = common.predictCombinedSimple(X_test, modelList)

    common.writeResultsFile(combinedModelOutput)
    print("Done predicting with multi-model and IGR.")
def tuneMultimodelKnnIgr(featureSizes, kValues):
    X_raw, y_raw = common.loadTrainingDataSet()

    scoreMap = dict()
    for featureSize in featureSizes:
        for kValue in kValues:
            scoreMap[(featureSize, kValue)] = []

    kf = KFold(n_splits=5, random_state=42, shuffle=True)
    foldNumber = 0

    for train_index, test_index in kf.split(X_raw):
        X_train, X_test = X_raw[train_index], X_raw[test_index]
        y_train, y_test = y_raw[train_index], y_raw[test_index]

        reducer = InformationGainReducer()
        reducer.fit(X_train, y_train)

        for featureSize in featureSizes:
            reducer.resize(featureSize)
            X_train_reduced = reducer.transform(X_train).toarray()
            X_test_reduced = reducer.transform(X_test).toarray()

            for kValue in kValues:
                modelList = []

                for modelNum in range(11):
                    rus_rs = 555 + (modelNum * featureSize)
                    rus = RandomUnderSampler(random_state=rus_rs)
                    X_model, y_model = rus.fit_resample(
                        X_train_reduced, y_train)

                    clf = KNeighborsClassifier(n_neighbors=kValue,
                                               metric='manhattan')
                    clf.fit(X_model, y_model)

                    modelList.append(clf)
                    print(".", end="")

                output = common.predictCombinedSimple(X_test_reduced,
                                                      modelList)
                combinedModelScore = f1_score(y_test, output)
                scoreMap[(featureSize, kValue)].append(combinedModelScore)

                print()
                print("Done with kValue = " + str(kValue) + " for fold #" +
                      str(foldNumber) + " for feature size = " +
                      str(featureSize) + ". F1 = " + str(combinedModelScore))

            print("Done with fold #" + str(foldNumber) +
                  " for feature size = " + str(featureSize))

        foldNumber += 1

    for featureSize in featureSizes:
        for kValue in kValues:
            meanF1Score = mean(scoreMap[(featureSize, kValue)])
            print("F1 Score for KNN with IGR, K = " + str(kValue) +
                  " and FR size = " + str(featureSize) + " is: " +
                  str(meanF1Score))
Ejemplo n.º 3
0
def tuneNaiveBayesIgrFeatureSize(featureSizeList, modelCountList):
    X_raw, y = common.loadTrainingDataSet()

    reducer = InformationGainReducer()
    reducer.fit(X_raw, y)

    for featureSize in featureSizeList:
        reducer.resize(featureSize)
        X = reducer.transform(X_raw).toarray()

        #print("Counter(y) = " + str(Counter(y)))
        for modelCount in modelCountList:
            kf = KFold(n_splits=5, random_state=42, shuffle=True)
            splitIndex = 0
            f1ScoreList = []

            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                modelList = []

                for modelNum in range(modelCount):
                    rs = 42 + modelNum
                    rus = RandomUnderSampler(random_state=rs)
                    X_model, y_model = rus.fit_resample(X_train, y_train)

                    nbClassifier = NaiveBayesClassifier()
                    nbClassifier.fit(X_model, y_model)

                    #X_test_2 = reducer.transform(X_test).toarray()
                    #output = nbClassifier.predict(X_test_2)
                    #modelScore = f1_score(y_test, output)

                    #print("Split Index = " + str(splitIndex) + ", Model Num = " + str(modelNum) + ", F1 = " + str(modelScore))

                    modelList.append(nbClassifier)
                    #print(".", end='')
                #print()

                combinedModelOutput = common.predictCombinedSimple(
                    X_test, modelList)
                combinedModelScore = f1_score(y_test, combinedModelOutput)
                f1ScoreList.append(combinedModelScore)
                #print("Combined Model Score for split #" + str(splitIndex) + " = " + str(combinedModelScore))

                splitIndex += 1

            print("F1 Score for FR size = " + str(featureSize) +
                  " and model count = " + str(modelCount) + " is: " +
                  str(mean(f1ScoreList)))
Ejemplo n.º 4
0
def tuneMultimodelIGR(featureSizes):
    X_raw, y_raw = common.loadTrainingDataSet()

    scoreMap = dict()
    for featureSize in featureSizes:
        scoreMap[featureSize] = []

    kf = KFold(n_splits=5, random_state=42, shuffle=True)
    foldNumber = 0

    for train_index, test_index in kf.split(X_raw):
        X_train, X_test = X_raw[train_index], X_raw[test_index]
        y_train, y_test = y_raw[train_index], y_raw[test_index]

        reducer = InformationGainReducer()
        reducer.fit(X_train, y_train)

        for featureSize in featureSizes:
            reducer.resize(featureSize)
            X_train_reduced = reducer.transform(X_train).toarray()

            modelList = []

            for modelNum in range(11):
                rus_rs = 555 + modelNum
                rus = RandomUnderSampler(random_state=rus_rs)
                X_model, y_model = rus.fit_resample(X_train_reduced, y_train)

                nbClassifier = NaiveBayesClassifier()
                nbClassifier.fit(X_model, y_model)

                modelList.append(nbClassifier)
                print(".", end="")

            X_test_reduced = reducer.transform(X_test).toarray()
            output = common.predictCombinedSimple(X_test_reduced, modelList)
            combinedModelScore = f1_score(y_test, output)
            scoreMap[featureSize].append(combinedModelScore)

            print()
            print("Done with fold #" + str(foldNumber) +
                  " for feature size = " + str(featureSize) + ". F1 = " +
                  str(combinedModelScore))

        foldNumber += 1

    for featureSize in featureSizes:
        meanF1Score = mean(scoreMap[featureSize])
        print("F1 Score for NN with Chi2 and FR size = " + str(featureSize) +
              " is: " + str(meanF1Score))
Ejemplo n.º 5
0
def tuneMultimodelSvm(featureSizes):
    X_raw, y_raw = common.loadTrainingDataSet()

    scoreMap = dict()
    for featureSize in featureSizes:
        scoreMap[featureSize] = []

    kf = KFold(n_splits=5, random_state=42, shuffle=True)
    foldNumber = 0

    for train_index, test_index in kf.split(X_raw):
        X_train, X_test = X_raw[train_index], X_raw[test_index]
        y_train, y_test = y_raw[train_index], y_raw[test_index]

        for featureSize in featureSizes:
            reducer = TruncatedSVD(n_components=featureSize)
            X_train_reduced = reducer.fit_transform(X_train)

            modelList = []

            for modelNum in range(11):
                rus_rs = 555 + (modelNum * featureSize)
                rus = RandomUnderSampler(random_state=rus_rs)
                X_model, y_model = rus.fit_resample(X_train_reduced, y_train)

                clf = SVC(gamma='scale')
                clf.fit(X_model, y_model)

                modelList.append(clf)
                print(".", end="")

            X_test_reduced = reducer.transform(X_test)
            output = common.predictCombinedSimple(X_test_reduced, modelList)
            combinedModelScore = f1_score(y_test, output)
            scoreMap[featureSize].append(combinedModelScore)

            print()
            print("Done with fold #" + str(foldNumber) +
                  " for feature size = " + str(featureSize) + ". F1 = " +
                  str(combinedModelScore))

        foldNumber += 1

    for featureSize in featureSizes:
        meanF1Score = mean(scoreMap[featureSize])
        print("F1 Score for SVM with Truncated SVD and FR size = " +
              str(featureSize) + " is: " + str(meanF1Score))