def validation_core(i, x, y, model, feature_count):
            (foldTrainX, foldTrainY, foldValidationX, foldValidationY) = self.__splitDataFold(x, y, i)

            mutualInformationTable = FeatureSelection.byMutualInformation(foldTrainX, foldTrainY)
            words = [word for word,_ in mutualInformationTable[:feature_count]]
            (xNewTrain, xNewValidation) = FeatureSelection.Featurize(foldTrainX, foldValidationX, words)

            model.fit(xNewTrain, foldTrainY)
            return self.__countCorrect(model.predict(xNewValidation), foldValidationY)
    def validateByMutualInformation(self, x, y, model):
        totalCorrect = 0

        for i in range(self.k):
            (foldTrainX, foldTrainY, foldValidationX,
             foldValidationY) = self.__splitDataFold(x, y, i)

            mutualInformationTable = FeatureSelection.byMutualInformation(
                foldTrainX, foldTrainY)
            words = [word for word, _ in mutualInformationTable[:10]]
            print('For fold %d/%d, choose words:' % (i + 1, self.k))
            print(words)
            (xNewTrain, xNewValidation) = FeatureSelection.Featurize(
                foldTrainX, foldValidationX, words)

            model.fit(xNewTrain, foldTrainY)
            totalCorrect += self.__countCorrect(model.predict(xNewValidation),
                                                foldValidationY)

        accuracy = totalCorrect / len(x)

        return accuracy
############################

import FeatureSelection

print('### Get the Frequency Table')

frequencyTable = FeatureSelection.byFrequency(xTrainRaw)
print('Top 10')
for i in range(10):
    print(frequencyTable[i])

#############################

print('### Get the Mutual Information Table')

mutualInformationTable = FeatureSelection.byMutualInformation(
    xTrainRaw, yTrain)
print('Top 10')
for i in range(10):
    print(mutualInformationTable[i])

#############################

print('### Run Gradient Descent with the Top 10 Words by Frequency')
words = [word for word, _ in frequencyTable[:10]]
print(words)
(xNewTrain, xNewTest) = FeatureSelection.Featurize(xTrainRaw, xTestRaw, words)

model.fit(xNewTrain, yTrain, iterations=50000, step=0.01)
yTestPredicted = model.predict(xNewTest)
testAccuracy = EvaluationsStub.Accuracy(yTest, yTestPredicted)
print("Test Set Accuracy is %f" % (testAccuracy))
print("Train is %f percent spam." % (sum(yTrainRaw)/len(yTrainRaw)))
print("Test is %f percent spam." % (sum(yTestRaw)/len(yTestRaw)))

(xTrain, xTest) = Assignment1Support.Featurize(xTrainRaw, xTestRaw)
yTrain = yTrainRaw
yTest = yTestRaw

import LogisticRegressionModel
model = LogisticRegressionModel.LogisticRegressionModel()

#############################

import FeatureSelection

print('### Get the Mutual Information Table')
mutualInformationTable = FeatureSelection.byMutualInformation(xRaw, yRaw)

#############################

import EvaluationsStub

print('### Get the Confusion Matrix')
words = [word for word,_ in mutualInformationTable[:10]]
print(words)
(xNewTrain, xNewTest) = FeatureSelection.Featurize(xTrainRaw, xTestRaw, words)

model.fit(xNewTrain, yTrain, iterations=50000, step=0.01)
yTestPredicted = model.predict(xNewTest)

EvaluationsStub.ExecuteAll(yTest, yTestPredicted)