def validateByFrequency(self, x, y, model): totalCorrect = 0 for i in range(self.k): (foldTrainX, foldTrainY, foldValidationX, foldValidationY) = self.__splitDataFold(x, y, i) frequencyTable = FeatureSelection.byFrequency(foldTrainX) words = [word for word,_ in frequencyTable[:10]] print('For fold %d/%d, choose words:' % (i + 1, self.k)) print(words) (xNewTrain, xNewValidation) = FeatureSelection.Featurize(foldTrainX, foldValidationX, words) model.fit(xNewTrain, foldTrainY) totalCorrect += self.__countCorrect(model.predict(xNewValidation), foldValidationY) accuracy = totalCorrect / len(x) return accuracy
testAccuracy = EvaluationsStub.Accuracy(yTest, yTestPredicted) print("Test Set Accuracy is %f" % (testAccuracy)) print("Train with all 5 features") model.fit(xTrain, yTrain, iterations=50000, step=0.01) yTestPredicted = model.predict(xTest) testAccuracy = EvaluationsStub.Accuracy(yTest, yTestPredicted) print("Test Set Accuracy is %f" % (testAccuracy)) ############################ import FeatureSelection print('### Get the Frequency Table') frequencyTable = FeatureSelection.byFrequency(xTrainRaw) print('Top 10') for i in range(10): print(frequencyTable[i]) ############################# print('### Get the Mutual Information Table') mutualInformationTable = FeatureSelection.byMutualInformation( xTrainRaw, yTrain) print('Top 10') for i in range(10): print(mutualInformationTable[i]) #############################