def testClassifyKeywordsAsExpected(self): """ Tests ClassificationModelKeywords. Training on the first five samples of the dataset, and testing on the rest, the model's classifications should match those in the expected classes data file. """ modelName = "Keywords" runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"), resultsDir="", experimentName="keywords_test", experimentType="incremental", loadPath=None, modelName=modelName, numClasses=3, plots=0, orderedSplit=True, trainSizes=[5], verbosity=0) runner.initModel(modelName) self.runExperiment(runner) expectedClasses, resultClasses = self.getExpectedClassifications( runner, os.path.join(DATA_DIR, "responses_expected_classes_keywords.csv")) for i, (e, r) in enumerate(zip(expectedClasses, resultClasses)): if i in (7, 9, 12): # Ties amongst winning labels are handled randomly, which affects the # third classification in these test samples. e = e[:2] r = r[:2] self.assertEqual(sorted(e), sorted(r), "Keywords model predicted classes other than what we expect.")
def testClassifyEndpointAsExpected(self): """ Tests ClassificationModelEndpoint. Training on the first five samples of the dataset, and testing on the rest, the model's classifications should match those in the expected classes data file. """ modelName = "CioEndpoint" runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"), resultsDir="", experimentName="endpoint_test", experimentType="incremental", loadPath=None, modelName=modelName, numClasses=3, plots=0, orderedSplit=True, trainSizes=[5], verbosity=0) runner.initModel(modelName) self.runExperiment(runner) expectedClasses, resultClasses = self.getExpectedClassifications(runner, os.path.join(DATA_DIR, "responses_expected_classes_endpoint.csv")) [self.assertEqual(sorted(e), sorted(r), "Endpoint model predicted classes other than what we expect.") for e, r in zip(expectedClasses, resultClasses)]
def testClassifyEndpointAsExpected(self): """ Tests ClassificationModelEndpoint. Training on the first five samples of the dataset, and testing on the rest, the model's classifications should match those in the expected classes data file. """ modelName = "CioEndpoint" runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"), resultsDir="", experimentName="endpoint_test", experimentType="incremental", loadPath=None, modelName=modelName, numClasses=3, plots=0, orderedSplit=True, trainSizes=[5], verbosity=0) runner.initModel(modelName) self.runExperiment(runner) expectedClasses, resultClasses = self.getExpectedClassifications( runner, os.path.join(DATA_DIR, "responses_expected_classes_endpoint.csv")) [ self.assertEqual( sorted(e), sorted(r), "Endpoint model predicted classes other than what we expect.") for e, r in zip(expectedClasses, resultClasses) ]
def testClassifyKeywordsAsExpected(self): """ Tests ClassificationModelKeywords. Training on the first five samples of the dataset, and testing on the rest, the model's classifications should match those in the expected classes data file. """ modelName = "Keywords" runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"), resultsDir="", experimentName="keywords_test", experimentType="incremental", loadPath=None, modelName=modelName, numClasses=3, plots=0, orderedSplit=True, trainSizes=[5], verbosity=0) runner.initModel(modelName) self.runExperiment(runner) expectedClasses, resultClasses = self.getExpectedClassifications( runner, os.path.join(DATA_DIR, "responses_expected_classes_keywords.csv")) for i, (e, r) in enumerate(zip(expectedClasses, resultClasses)): if i in (7, 9, 12): # Ties amongst winning labels are handled randomly, which affects the # third classification in these test samples. e = e[:2] r = r[:2] self.assertEqual( sorted(e), sorted(r), "Keywords model predicted classes other than what we expect.")
def run(args): start = time.time() root = os.path.dirname(os.path.realpath(__file__)) args.resultsDir = os.path.join(root, args.resultsDir) if args.modelName == "HTMNetwork": runner = HTMRunner(**args.__dict__) runner.initModel(0) else: runner = Runner(**args.__dict__) runner.initModel(args.modelName) print "Reading in data and preprocessing." dataTime = time.time() runner.setupData(args.textPreprocess) print( "Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding " "the data".format(time.time() - dataTime)) encodeTime = time.time() runner.encodeSamples(args.writeEncodings) print( "Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the " "experiment.".format(time.time() - encodeTime)) runner.runExperiment(args.seed) runner.writeOutClassifications() resultCalcs = runner.calculateResults() runner.evaluateCumulativeResults(resultCalcs) print "Saving..." runner.saveModel() print "Experiment complete in {0:.2f} seconds.".format(time.time() - start) if args.validation: print "Validating experiment against expected classifications..." print runner.validateExperiment(args.validation)