Example #1
0
    def test_makeTrainingTestingSetBadPercentage(self):
        desiredClassStatus = 'Senior'
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=desiredClassStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=desiredClassStatus, useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )

        trainingPercentage = 2
        trainingSetAndTestingSet = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=desiredClassStatus,
            secondLabel='Other',
            firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList,
            firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds,
            trainingPercentage=trainingPercentage)

        self.assertIsNone(trainingSetAndTestingSet)
    def __init__(self, classStatus, trainingPercentage, modelSaveFile=None, featuresValuesCountsSaveFile=None):
        self.classStatus = classStatus
        self.trainingPercentage = trainingPercentage
        self.badLabel = 'Other'
        self.modelSaveFile = modelSaveFile
        self.featuresValueCountsSaveFile = featuresValuesCountsSaveFile

        print('Creating datasets...')
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=self.classStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=self.classStatus, useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList()
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities()
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList()
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities()

        trainingTestingList = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=self.classStatus, secondLabel=self.badLabel, firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList, firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds, trainingPercentage=0.8)
        self.training = trainingTestingList[0]
        self.testing = trainingTestingList[1]

        self.dataFrame = self.makeDataFrame()

        self.trainingVectors = []
        self.testingVectors = []

        self.featuresValueCountsIndexes = []
        self.logisticRegressionClassifier = LogisticRegression()
Example #3
0
    def test_makeTrainingTestingSets(self):
        desiredClassStatus = 'Junior'
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=desiredClassStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=desiredClassStatus, useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )

        trainingPercentage = 0.9
        trainingSetAndTestingSet = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=desiredClassStatus,
            secondLabel='Other',
            firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList,
            firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds,
            trainingPercentage=trainingPercentage)
        training = trainingSetAndTestingSet[0]
        testing = trainingSetAndTestingSet[1]
        totalSetSize = len(training) + len(testing)
        self.assertEqual(math.ceil(totalSetSize * trainingPercentage),
                         len(training))
        self.assertEqual((len(goodClassStatusDataTextList) +
                          len(badClassStatusDataTextList)), totalSetSize)
Example #4
0
    def test_runEnsembleClassifier(self):
        databaseInfo = GetDatabaseInfoScholarshipsWithClassStatuses()
        dataTextList = databaseInfo.getConcatenatedDescriptionsEligibilities()
        idsList = databaseInfo.getScholarshipsWithClassStatusIdsList()
        ensembleClassifyTest = LogisticRegressionMultilabelClassifyClassStatusFromPretrainedModels(dataTextList,
                                                                                                   idsList)

        ensembleClassifyTest.displayResults()
    def test_makeTrainingTestingSetBadPercentage(self):
        desiredClassStatus = 'Senior'
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=desiredClassStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=desiredClassStatus,
                                                                        useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList()
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities()
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList()
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities()

        trainingPercentage = 2
        trainingSetAndTestingSet = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=desiredClassStatus, secondLabel='Other', firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList, firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds, trainingPercentage=trainingPercentage)

        self.assertIsNone(trainingSetAndTestingSet)
Example #6
0
    def __init__(self,
                 classStatus,
                 trainingPercentage,
                 modelSaveFile=None,
                 featuresValuesCountsSaveFile=None):
        self.classStatus = classStatus
        self.trainingPercentage = trainingPercentage
        self.badLabel = 'Other'
        self.modelSaveFile = modelSaveFile
        self.featuresValueCountsSaveFile = featuresValuesCountsSaveFile

        print('Creating datasets...')
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=self.classStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(
            requirementNeeded=self.classStatus, useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList(
        )
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities(
        )

        trainingTestingList = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=self.classStatus,
            secondLabel=self.badLabel,
            firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList,
            firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds,
            trainingPercentage=0.8)
        self.training = trainingTestingList[0]
        self.testing = trainingTestingList[1]

        self.dataFrame = self.makeDataFrame()

        self.trainingVectors = []
        self.testingVectors = []

        self.featuresValueCountsIndexes = []
        self.logisticRegressionClassifier = LogisticRegression()
    def test_makeTrainingTestingSets(self):
        desiredClassStatus = 'Junior'
        goodClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=desiredClassStatus)
        badClassStatusDB = GetDatabaseInfoScholarshipsWithClassStatuses(requirementNeeded=desiredClassStatus,
                                                                        useNot=True)

        goodClassStatusIds = goodClassStatusDB.getScholarshipsWithClassStatusIdsList()
        goodClassStatusDataTextList = goodClassStatusDB.getConcatenatedDescriptionsEligibilities()
        badClassStatusIds = badClassStatusDB.getScholarshipsWithClassStatusIdsList()
        badClassStatusDataTextList = badClassStatusDB.getConcatenatedDescriptionsEligibilities()

        trainingPercentage = 0.9
        trainingSetAndTestingSet = MakeDataSet.makeBinaryLabelTrainingAndTestingSet(
            firstLabel=desiredClassStatus, secondLabel='Other', firstLabelTextList=goodClassStatusDataTextList,
            secondLabelTextList=badClassStatusDataTextList, firstLabelIdsList=goodClassStatusIds,
            secondLabelIdsList=badClassStatusIds, trainingPercentage=trainingPercentage)
        training = trainingSetAndTestingSet[0]
        testing = trainingSetAndTestingSet[1]
        totalSetSize = len(training) + len(testing)
        self.assertEqual(math.ceil(totalSetSize * trainingPercentage), len(training))
        self.assertEqual((len(goodClassStatusDataTextList) + len(badClassStatusDataTextList)), totalSetSize)
    def test_runModelOnFreshman(self):
        classStatus = 'Freshman'
        modelSaveFile = '..\ClassifierTrainedModels\%sClassStatusTrainedLRModel' % classStatus
        featuresValueCountsSaveFile = '..\ClassifierTrainedModels\%sClassStatusTrainedFeaturesValueCounts' % classStatus

        # test to make sure can open model and fvc save files:
        testModelSaveOpen = open(modelSaveFile, 'rb')
        testModelSaveOpen.close()
        testFVCSaveOpen = open(featuresValueCountsSaveFile, 'rb')
        testFVCSaveOpen.close()

        # now do test
        databaseInfo = GetDatabaseInfoScholarshipsWithClassStatuses()
        dataTextList = databaseInfo.getConcatenatedDescriptionsEligibilities()
        idsList = databaseInfo.getScholarshipsWithClassStatusIdsList()
        testClassify = LogisticRegressionClassifyClassStatusFromPretrainedModel(trainedModelInputFile=modelSaveFile,
                                                              trainedFeaturesValueCountsIndexesFile=featuresValueCountsSaveFile,
                                                              testingDataTextList=dataTextList,
                                                              testingDataIdsList=idsList)
        testClassify.displayResults()
Example #9
0
    def test_RunEnsembleClassifierInsertResultsIntoDB(self):
        # get data from db
        databaseInfoExtract = GetDatabaseInfoScholarshipsWithClassStatuses()
        dataTextList = databaseInfoExtract.getConcatenatedDescriptionsEligibilities()
        idsList = databaseInfoExtract.getScholarshipsWithClassStatusIdsList()

        # run classifier, return results
        ensembleClassifyTest = LogisticRegressionMultilabelClassifyClassStatusFromPretrainedModels(dataTextList,
                                                                                                   idsList)
        idsAndPredictionsList = ensembleClassifyTest.doAllClassificationsAndReturnFilteredPredictionsListsById()

        # insert results into db
        db = SUDBConnect()
        ids = [prediction[0] for prediction in idsAndPredictionsList]
        predictions = [prediction[1] for prediction in idsAndPredictionsList]
        for scholarshipWithClassStatusID, prediction in zip(ids, predictions):
            prediction = ', '.join(prediction)
            db.insertUpdateOrDeleteDB(
                "update dbo.ScholarshipsWithClassStatuses set EnsembleClassifierPrediction='" + prediction + "' where ScholarshipsWithClassStatusId = '" + str(
                    scholarshipWithClassStatusID) + "'")
Example #10
0
    def test_runModelOnFreshman(self):
        classStatus = 'Freshman'
        modelSaveFile = '..\ClassifierTrainedModels\%sClassStatusTrainedLRModel' % classStatus
        featuresValueCountsSaveFile = '..\ClassifierTrainedModels\%sClassStatusTrainedFeaturesValueCounts' % classStatus

        # test to make sure can open model and fvc save files:
        testModelSaveOpen = open(modelSaveFile, 'rb')
        testModelSaveOpen.close()
        testFVCSaveOpen = open(featuresValueCountsSaveFile, 'rb')
        testFVCSaveOpen.close()

        # now do test
        databaseInfo = GetDatabaseInfoScholarshipsWithClassStatuses()
        dataTextList = databaseInfo.getConcatenatedDescriptionsEligibilities()
        idsList = databaseInfo.getScholarshipsWithClassStatusIdsList()
        testClassify = LogisticRegressionClassifyClassStatusFromPretrainedModel(
            trainedModelInputFile=modelSaveFile,
            trainedFeaturesValueCountsIndexesFile=featuresValueCountsSaveFile,
            testingDataTextList=dataTextList,
            testingDataIdsList=idsList)
        testClassify.displayResults()