def countMismatch(self, keyword, peptidesToRemoveCount): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) tsrt = pcssSvm.TestSetResultTracker(self.runner) firstTestSetResult = self.getSvmBenchmarkTestSetResult( benchmarker, peptideList) positivePeptidesRemoved = 0 for protein in self.reader.getProteins(): peptides = protein.peptides peptideIter = peptides.iteritems() for peptidePosition, peptide in list(peptideIter): if (peptide.getAttributeOutputString("status") == keyword): peptides.pop(peptidePosition) positivePeptidesRemoved += 1 if (positivePeptidesRemoved > peptidesToRemoveCount): break if (positivePeptidesRemoved > peptidesToRemoveCount): break peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) secondTestSetResult = self.getSvmBenchmarkTestSetResult( benchmarker, peptideList) tsrt.addTestSetResult(firstTestSetResult) tsrt.addTestSetResult(secondTestSetResult) return tsrt
def countMismatch(self, keyword, peptidesToRemoveCount): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) tsrt = pcssSvm.TestSetResultTracker(self.runner) firstTestSetResult = self.getSvmBenchmarkTestSetResult(benchmarker, peptideList) positivePeptidesRemoved = 0 for protein in self.reader.getProteins(): peptides = protein.peptides peptideIter = peptides.iteritems() for peptidePosition, peptide in list(peptideIter): if (peptide.getAttributeOutputString("status") == keyword): peptides.pop(peptidePosition) positivePeptidesRemoved += 1 if (positivePeptidesRemoved > peptidesToRemoveCount): break if (positivePeptidesRemoved > peptidesToRemoveCount): break peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) secondTestSetResult = self.getSvmBenchmarkTestSetResult(benchmarker, peptideList) tsrt.addTestSetResult(firstTestSetResult) tsrt.addTestSetResult(secondTestSetResult) return tsrt
def test_leave_one_out_test_set_error(self): benchmarker = self.getLeaveOneOutBenchmarker() peptideSet = pcssTools.getAllPeptides(self.reader.getProteins(), False)[0:20] benchmarker.createTrainingAndTestSets(peptideSet) fakeTestSvm = pcssSvm.TestSvm(self.runner) fakeTestSvm.setPeptides(pcssTools.getAllPeptides(self.reader.getProteins(), False)[21:23]) benchmarker.testSvm = fakeTestSvm with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.trainAndApplyModel() self.handleTestException(pge)
def test_leave_one_out_test_set_error(self): benchmarker = self.getLeaveOneOutBenchmarker() peptideSet = pcssTools.getAllPeptides(self.reader.getProteins(), False)[0:20] benchmarker.createTrainingAndTestSets(peptideSet) fakeTestSvm = pcssSvm.TestSvm(self.runner) fakeTestSvm.setPeptides( pcssTools.getAllPeptides(self.reader.getProteins(), False)[21:23]) benchmarker.testSvm = fakeTestSvm with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.trainAndApplyModel() self.handleTestException(pge)
def test_leave_one_out_internal_count_error(self): benchmarker = self.getLeaveOneOutBenchmarker() peptideSet = pcssTools.getAllPeptides(self.reader.getProteins(), False)[0:20] with self.assertRaises(pcssErrors.PcssGlobalException) as pge: self.throwBadCountError(benchmarker, peptideSet) self.handleTestException(pge)
def test_more_positives_than_negatives(self): self.readTrainingAnnotationInputFile(self.getErrorInputFile("trainingMorePositives.txt")) peptides = pcssTools.getAllPeptides(self.reader.getProteins(), False) benchmarker = pcssSvm.SvmBenchmarker(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.createTrainingAndTestSets(peptides) self.handleTestException(pge)
def test_train_and_test_svm(self): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False pstList = self.getSvmBenchmarkTestSetResult(benchmarker, pcssTools.getAllPeptides(self.reader.getProteins(), False)) firstTuple = pstList.getBenchmarkTuple(0) self.assertEquals(float(firstTuple.score), -3.9027991) self.assertEquals(float(firstTuple.fpr), 0.0136986301369863)
def test_more_positives_than_negatives(self): self.readTrainingAnnotationInputFile( self.getErrorInputFile("trainingMorePositives.txt")) peptides = pcssTools.getAllPeptides(self.reader.getProteins(), False) benchmarker = pcssSvm.SvmBenchmarker(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.createTrainingAndTestSets(peptides) self.handleTestException(pge)
def test_train_svm(self): benchmarker = self.getSvmBenchmarker() benchmarker.createTrainingAndTestSets(pcssTools.getAllPeptides(self.reader.getProteins(), False)) self.assertEquals(len(benchmarker.benchmarkHandler.positiveTrainingSet), 61) self.assertEquals(len(benchmarker.benchmarkHandler.negativeTrainingSet), 61) self.assertEquals(len(benchmarker.benchmarkHandler.positiveTestSet), 6) self.assertEquals(len(benchmarker.benchmarkHandler.negativeTestSet), 73)
def test_train_and_test_svm(self): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False pstList = self.getSvmBenchmarkTestSetResult( benchmarker, pcssTools.getAllPeptides(self.reader.getProteins(), False)) firstTuple = pstList.getBenchmarkTuple(0) self.assertEquals(float(firstTuple.score), -3.9027991) self.assertEquals(float(firstTuple.fpr), 0.0136986301369863)
def test_leave_one_out(self): benchmarker = self.getLeaveOneOutBenchmarker() peptideSet = pcssTools.getAllPeptides(self.reader.getProteins(), False)[0:20] for i in range(len(peptideSet)): benchmarker.createTrainingAndTestSets(peptideSet) benchmarker.trainAndApplyModel() benchmarker.readBenchmarkResults() benchmarker.processAllResults() self.compareToExpectedOutput(self.runner.pdh.getLeaveOneOutResultFileName(), "leaveOneOut")
def test_bad_status(self): self.readStandardTrainingAnnotationInputFile() peptide = self.reader.getProteins()[0].peptides.values()[0] peptide.addStringAttribute("status", "fake") svm = pcssSvm.TrainingSvm(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: svm.setPeptides(pcssTools.getAllPeptides(self.reader.getProteins(), False)) self.handleTestException(pge)
def test_bad_status(self): self.readStandardTrainingAnnotationInputFile() peptide = self.reader.getProteins()[0].peptides.values()[0] peptide.addStringAttribute("status", "fake") svm = pcssSvm.TrainingSvm(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: svm.setPeptides( pcssTools.getAllPeptides(self.reader.getProteins(), False)) self.handleTestException(pge)
def test_no_test_set_positives(self): try: self.readTrainingAnnotationInputFile(self.getErrorInputFile("trainingNoTestSetPositives.txt")) except pcssErrors.PcssGlobalException as e: print e.msg peptides = pcssTools.getAllPeptides(self.reader.getProteins(), False) benchmarker = pcssSvm.SvmBenchmarker(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.createTrainingAndTestSets(peptides) self.handleTestException(pge)
def test_leave_one_out(self): benchmarker = self.getLeaveOneOutBenchmarker() peptideSet = pcssTools.getAllPeptides(self.reader.getProteins(), False)[0:20] for i in range(len(peptideSet)): benchmarker.createTrainingAndTestSets(peptideSet) benchmarker.trainAndApplyModel() benchmarker.readBenchmarkResults() benchmarker.processAllResults() self.compareToExpectedOutput( self.runner.pdh.getLeaveOneOutResultFileName(), "leaveOneOut")
def test_no_test_set_positives(self): try: self.readTrainingAnnotationInputFile( self.getErrorInputFile("trainingNoTestSetPositives.txt")) except pcssErrors.PcssGlobalException as e: print e.msg peptides = pcssTools.getAllPeptides(self.reader.getProteins(), False) benchmarker = pcssSvm.SvmBenchmarker(self.runner) with self.assertRaises(pcssErrors.PcssGlobalException) as pge: benchmarker.createTrainingAndTestSets(peptides) self.handleTestException(pge)
def test_train_svm(self): benchmarker = self.getSvmBenchmarker() benchmarker.createTrainingAndTestSets( pcssTools.getAllPeptides(self.reader.getProteins(), False)) self.assertEquals( len(benchmarker.benchmarkHandler.positiveTrainingSet), 61) self.assertEquals( len(benchmarker.benchmarkHandler.negativeTrainingSet), 61) self.assertEquals(len(benchmarker.benchmarkHandler.positiveTestSet), 6) self.assertEquals(len(benchmarker.benchmarkHandler.negativeTestSet), 73)
def test_multiple_iteration_normal_output(self): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False tsrt = pcssSvm.TestSetResultTracker(self.runner) peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) testSetResult = self.getSvmBenchmarkTestSetResult(benchmarker, peptideList) tsrt.addTestSetResult(testSetResult) peptideList.reverse() testSetResult = self.getSvmBenchmarkTestSetResult(benchmarker, peptideList) tsrt.addTestSetResult(testSetResult) tsrt.finalize() tsrt.writeResultFile(self.runner.pdh.getFullBenchmarkResultFileName()) self.compareToExpectedOutput(self.runner.pdh.getFullBenchmarkResultFileName(), "trainingSvm")
def test_tracker_average(self): benchmarker = self.getSvmBenchmarker() tsrList = [] for i in range(2): tsrList.append(self.getSvmBenchmarkTestSetResult(benchmarker, pcssTools.getAllPeptides(self.reader.getProteins(), False))) fprRates = [] tsrt = pcssSvm.TestSetResultTracker(self.runner) for tsr in tsrList: tprTuples = tsr.getIncrementedTprTuples() firstFprRate = tprTuples[0].fpr fprRates.append(firstFprRate) tsrt.addTestSetResult(tsr) average = float(sum(fprRates)) / float(len(fprRates)) tsrt.finalize() firstTuple = tsrt.getBenchmarkTuple(0) self.assertAlmostEqual(average, firstTuple.fpr)
def test_multiple_iteration_normal_output(self): benchmarker = self.getSvmBenchmarker() self.runner.internalConfig["make_random_test_set"] = False tsrt = pcssSvm.TestSetResultTracker(self.runner) peptideList = pcssTools.getAllPeptides(self.reader.getProteins(), False) testSetResult = self.getSvmBenchmarkTestSetResult( benchmarker, peptideList) tsrt.addTestSetResult(testSetResult) peptideList.reverse() testSetResult = self.getSvmBenchmarkTestSetResult( benchmarker, peptideList) tsrt.addTestSetResult(testSetResult) tsrt.finalize() tsrt.writeResultFile(self.runner.pdh.getFullBenchmarkResultFileName()) self.compareToExpectedOutput( self.runner.pdh.getFullBenchmarkResultFileName(), "trainingSvm")
def test_tracker_average(self): benchmarker = self.getSvmBenchmarker() tsrList = [] for i in range(2): tsrList.append( self.getSvmBenchmarkTestSetResult( benchmarker, pcssTools.getAllPeptides(self.reader.getProteins(), False))) fprRates = [] tsrt = pcssSvm.TestSetResultTracker(self.runner) for tsr in tsrList: tprTuples = tsr.getIncrementedTprTuples() firstFprRate = tprTuples[0].fpr fprRates.append(firstFprRate) tsrt.addTestSetResult(tsr) average = float(sum(fprRates)) / float(len(fprRates)) tsrt.finalize() firstTuple = tsrt.getBenchmarkTuple(0) self.assertAlmostEqual(average, firstTuple.fpr)
def getAllPeptides(self, proteins): return pcssTools.getAllPeptides(proteins, False)