def run(args):
  start = time.time()

  if (not isinstance(args.kFolds, int)) or (args.kFolds < 1):
    raise ValueError("Invalid value for number of cross-validation folds.")

  root = os.path.dirname(os.path.realpath(__file__))
  resultsDir = os.path.join(root, args.resultsDir)

  if args.modelName == "HTMNetwork":
    runner = HTMRunner(dataPath=args.dataPath,
                       networkConfigPath=args.networkConfigPath,
                       resultsDir=resultsDir,
                       experimentName=args.experimentName,
                       loadPath=args.loadPath,
                       modelName=args.modelName,
                       numClasses=args.numClasses,
                       plots=args.plots,
                       orderedSplit=args.orderedSplit,
                       trainSizes=[],
                       verbosity=args.verbosity,
                       generateData=args.generateData,
                       votingMethod=args.votingMethod,
                       classificationFile=args.classificationFile,
                       classifierType=args.classifierType)
  else:
    runner = Runner(dataPath=args.dataPath,
                    resultsDir=resultsDir,
                    experimentName=args.experimentName,
                    loadPath=args.loadPath,
                    modelName=args.modelName,
                    numClasses=args.numClasses,
                    plots=args.plots,
                    orderedSplit=args.orderedSplit,
                    trainSizes=[],
                    verbosity=args.verbosity)

    # HTM network data isn't ready yet to initialize the model
    runner.initModel(args.modelName)

  print "Reading in data and preprocessing."
  dataTime = time.time()
  runner.setupData(args.textPreprocess)

  # TODO: move kfolds splitting to Runner
  random = False if args.orderedSplit else True
  runner.partitions = KFolds(args.kFolds).split(
    range(len(runner.samples)), randomize=random)
  runner.trainSizes = [len(x[0]) for x in runner.partitions]
  print ("Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding "
         "the data".format(time.time() - dataTime))

  encodeTime = time.time()
  runner.encodeSamples()
  print ("Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the "
         "experiment.".format(time.time() - encodeTime))

  runner.runExperiment()
  print "Experiment complete in {0:.2f} seconds.".format(time.time() - start)

  resultCalcs = runner.calculateResults()
  _ = runner.evaluateCumulativeResults(resultCalcs)

  print "Saving..."
  runner.saveModel()

  if args.validation:
    print "Validating experiment against expected classifications..."
    print runner.validateExperiment(args.validation)
Ejemplo n.º 2
0
def run(args):
    start = time.time()

    if (not isinstance(args.kFolds, int)) or (args.kFolds < 1):
        raise ValueError("Invalid value for number of cross-validation folds.")

    root = os.path.dirname(os.path.realpath(__file__))
    resultsDir = os.path.join(root, args.resultsDir)

    if args.modelName == "HTMNetwork":
        runner = HTMRunner(dataPath=args.dataPath,
                           networkConfigPath=args.networkConfigPath,
                           resultsDir=resultsDir,
                           experimentName=args.experimentName,
                           loadPath=args.loadPath,
                           modelName=args.modelName,
                           numClasses=args.numClasses,
                           plots=args.plots,
                           orderedSplit=args.orderedSplit,
                           trainSizes=[],
                           verbosity=args.verbosity,
                           generateData=args.generateData,
                           votingMethod=args.votingMethod,
                           classificationFile=args.classificationFile,
                           classifierType=args.classifierType)
    else:
        runner = Runner(dataPath=args.dataPath,
                        resultsDir=resultsDir,
                        experimentName=args.experimentName,
                        loadPath=args.loadPath,
                        modelName=args.modelName,
                        numClasses=args.numClasses,
                        plots=args.plots,
                        orderedSplit=args.orderedSplit,
                        trainSizes=[],
                        verbosity=args.verbosity)

        # HTM network data isn't ready yet to initialize the model
        runner.initModel(args.modelName)

    print "Reading in data and preprocessing."
    dataTime = time.time()
    runner.setupData(args.textPreprocess)

    # TODO: move kfolds splitting to Runner
    random = False if args.orderedSplit else True
    runner.partitions = KFolds(args.kFolds).split(range(len(runner.samples)),
                                                  randomize=random)
    runner.trainSizes = [len(x[0]) for x in runner.partitions]
    print(
        "Data setup complete; elapsed time is {0:.2f} seconds.\nNow encoding "
        "the data".format(time.time() - dataTime))

    encodeTime = time.time()
    runner.encodeSamples()
    print(
        "Encoding complete; elapsed time is {0:.2f} seconds.\nNow running the "
        "experiment.".format(time.time() - encodeTime))

    runner.runExperiment()
    print "Experiment complete in {0:.2f} seconds.".format(time.time() - start)

    resultCalcs = runner.calculateResults()
    _ = runner.evaluateCumulativeResults(resultCalcs)

    print "Saving..."
    runner.saveModel()

    if args.validation:
        print "Validating experiment against expected classifications..."
        print runner.validateExperiment(args.validation)