def testClassifyKeywordsAsExpected(self):
    """
    Tests ClassificationModelKeywords.

    Training on the first five samples of the dataset, and testing on the rest,
    the model's classifications should match those in the expected classes
    data file.
    """
    modelName = "Keywords"
    runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"),
                    resultsDir="",
                    experimentName="keywords_test",
                    experimentType="incremental",
                    loadPath=None,
                    modelName=modelName,
                    numClasses=3,
                    plots=0,
                    orderedSplit=True,
                    trainSizes=[5],
                    verbosity=0)
    runner.initModel(modelName)
    self.runExperiment(runner)

    expectedClasses, resultClasses = self.getExpectedClassifications(
      runner, os.path.join(DATA_DIR, "responses_expected_classes_keywords.csv"))

    for i, (e, r) in enumerate(zip(expectedClasses, resultClasses)):
      if i in (7, 9, 12):
        # Ties amongst winning labels are handled randomly, which affects the
        # third classification in these test samples.
        e = e[:2]
        r = r[:2]
      self.assertEqual(sorted(e), sorted(r),
      "Keywords model predicted classes other than what we expect.")
  def testClassifyEndpointAsExpected(self):
    """
    Tests ClassificationModelEndpoint.

    Training on the first five samples of the dataset, and testing on the rest,
    the model's classifications should match those in the expected classes
    data file.
    """
    modelName = "CioEndpoint"
    runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"),
                    resultsDir="",
                    experimentName="endpoint_test",
                    experimentType="incremental",
                    loadPath=None,
                    modelName=modelName,
                    numClasses=3,
                    plots=0,
                    orderedSplit=True,
                    trainSizes=[5],
                    verbosity=0)
    runner.initModel(modelName)
    self.runExperiment(runner)

    expectedClasses, resultClasses = self.getExpectedClassifications(runner,
      os.path.join(DATA_DIR, "responses_expected_classes_endpoint.csv"))

    [self.assertEqual(sorted(e), sorted(r),
      "Endpoint model predicted classes other than what we expect.")
      for e, r in zip(expectedClasses, resultClasses)]
Exemple #3
0
    def testClassifyEndpointAsExpected(self):
        """
    Tests ClassificationModelEndpoint.

    Training on the first five samples of the dataset, and testing on the rest,
    the model's classifications should match those in the expected classes
    data file.
    """
        modelName = "CioEndpoint"
        runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"),
                        resultsDir="",
                        experimentName="endpoint_test",
                        experimentType="incremental",
                        loadPath=None,
                        modelName=modelName,
                        numClasses=3,
                        plots=0,
                        orderedSplit=True,
                        trainSizes=[5],
                        verbosity=0)
        runner.initModel(modelName)
        self.runExperiment(runner)

        expectedClasses, resultClasses = self.getExpectedClassifications(
            runner,
            os.path.join(DATA_DIR, "responses_expected_classes_endpoint.csv"))

        [
            self.assertEqual(
                sorted(e), sorted(r),
                "Endpoint model predicted classes other than what we expect.")
            for e, r in zip(expectedClasses, resultClasses)
        ]
Exemple #4
0
    def testClassifyKeywordsAsExpected(self):
        """
    Tests ClassificationModelKeywords.

    Training on the first five samples of the dataset, and testing on the rest,
    the model's classifications should match those in the expected classes
    data file.
    """
        modelName = "Keywords"
        runner = Runner(dataPath=os.path.join(DATA_DIR, "responses.csv"),
                        resultsDir="",
                        experimentName="keywords_test",
                        experimentType="incremental",
                        loadPath=None,
                        modelName=modelName,
                        numClasses=3,
                        plots=0,
                        orderedSplit=True,
                        trainSizes=[5],
                        verbosity=0)
        runner.initModel(modelName)
        self.runExperiment(runner)

        expectedClasses, resultClasses = self.getExpectedClassifications(
            runner,
            os.path.join(DATA_DIR, "responses_expected_classes_keywords.csv"))

        for i, (e, r) in enumerate(zip(expectedClasses, resultClasses)):
            if i in (7, 9, 12):
                # Ties amongst winning labels are handled randomly, which affects the
                # third classification in these test samples.
                e = e[:2]
                r = r[:2]
            self.assertEqual(
                sorted(e), sorted(r),
                "Keywords model predicted classes other than what we expect.")
Exemple #5
0
def run(args):
    start = time.time()

    root = os.path.dirname(os.path.realpath(__file__))
    args.resultsDir = os.path.join(root, args.resultsDir)

    if args.modelName == "HTMNetwork":
        runner = HTMRunner(**args.__dict__)
        runner.initModel(0)
    else:
        runner = Runner(**args.__dict__)
        runner.initModel(args.modelName)

    print "Reading in data and preprocessing."
    dataTime = time.time()
    runner.setupData(args.textPreprocess)
    print(
        "Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding "
        "the data".format(time.time() - dataTime))

    encodeTime = time.time()
    runner.encodeSamples(args.writeEncodings)
    print(
        "Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the "
        "experiment.".format(time.time() - encodeTime))

    runner.runExperiment(args.seed)

    runner.writeOutClassifications()

    resultCalcs = runner.calculateResults()
    runner.evaluateCumulativeResults(resultCalcs)

    print "Saving..."
    runner.saveModel()

    print "Experiment complete in {0:.2f} seconds.".format(time.time() - start)

    if args.validation:
        print "Validating experiment against expected classifications..."
        print runner.validateExperiment(args.validation)