コード例 #1
0
ファイル: imbu.py プロジェクト: andrewmalta13/nupic.research
  def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs,
      **modelFactoryKwargs):
    """ Creates a new model and trains it, or loads a previously trained model
    from specified loadPath.
    """
    # The model name must be an identifier defined in the model factory mapping.
    modelType = getattr(ClassificationModelTypes, self._mapModelName(modelName))

    if loadPath:
      # User has explicitly specified a load path and expects a model to exist
      try:
        model = ClassificationModel.load(loadPath)

      except IOError as exc:
        # Model was not found, user may have specified incorrect path, DO NOT
        # attempt to create a new model and raise an exception
        raise ImbuUnableToLoadModelError(exc)
    else:
      # User has not specified a load path, defer to default case and
      # gracefully create a new model
      try:
        model = ClassificationModel.load(loadPath)
      except IOError as exc:
        model = self._modelFactory(modelName,
                                   savePath,
                                   *modelFactoryArgs,
                                   **modelFactoryKwargs)
        self.train(model, savePath)

    return model
コード例 #2
0
ファイル: imbu.py プロジェクト: weaver-viii/nupic.research
    def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs,
                    **modelFactoryKwargs):
        """ Creates a new model and trains it, or loads a previously trained model
    from specified loadPath.
    """
        # The model name must be an identifier defined in the model factory mapping.
        modelType = getattr(ClassificationModelTypes,
                            self._mapModelName(modelName))

        if loadPath:
            # User has explicitly specified a load path and expects a model to exist
            try:
                model = ClassificationModel.load(loadPath)

            except IOError as exc:
                # Model was not found, user may have specified incorrect path, DO NOT
                # attempt to create a new model and raise an exception
                raise ImbuUnableToLoadModelError(exc)
        else:
            # User has not specified a load path, defer to default case and
            # gracefully create a new model
            try:
                model = ClassificationModel.load(loadPath)
            except IOError as exc:
                model = self._modelFactory(modelName, savePath,
                                           *modelFactoryArgs,
                                           **modelFactoryKwargs)
                self.train(model, savePath)

        return model
コード例 #3
0
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

    (dataSet, labelRefs, documentCategoryMap,
     documentTextMap) = readDataAndReshuffle(args)

    # Train only with documents whose id's are divisible by 100
    trainingData = [x for i, x in enumerate(dataSet) if x[2] % 100 == 0]
    testData = [x for i, x in enumerate(dataSet) if x[2] % 100 != 0]

    print "Num training", len(trainingData), "num testing", len(testData)

    # Create model
    model = instantiateModel(args)

    model = trainModel(args, model, trainingData, labelRefs)
    model.save(args.modelDir)
    newmodel = ClassificationModel.load(args.modelDir)
    testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)
    testModel(args, newmodel, testData, labelRefs, documentCategoryMap)

    return model
コード例 #4
0
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """
    # Create model
    model = instantiateModel(args)

    # Train model on the first 80% of the dataset
    trainingSplit = int(len(_DATASET) * 0.80)
    model = trainModel(model, _DATASET[:trainingSplit])

    # Test model on the full dataset
    accuracyPct = testModel(model, _DATASET)

    # Validate serialization - testing after reloading should give same result
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    print
    print "Testing serialization..."
    newAccuracyPct = testModel(newModel, _DATASET)
    if accuracyPct == newAccuracyPct:
        print "Serialization validated."
    else:
        print (
            "Inconsistent results before ({}) and after ({}) saving/loading "
            "the model!".format(accuracyPct, newAccuracyPct)
        )
コード例 #5
0
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (dataSet, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args)

  # Train only with documents whose id's are divisible by 100
  trainingData = [x for i,x in enumerate(dataSet) if x[2]%100==0]
  testData = [x for i,x in enumerate(dataSet) if x[2]%100!=0]

  print "Num training",len(trainingData),"num testing",len(testData)

  # Create model
  model = instantiateModel(args)

  model = trainModel(args, model, trainingData, labelRefs)
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)
  testModel(args, newmodel, testData, labelRefs, documentCategoryMap)

  return model
コード例 #6
0
 def _executeModelLifecycle(self, modelName, modelDir):
   """ Create a model, train it, save it, reload it, return it."""
   model = createModel(modelName, **self.modelParams)
   model = trainModel(model, self.dataSet)
   model.save(modelDir)
   del model
   return ClassificationModel.load(modelDir)
コード例 #7
0
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """
    # Create model
    model = instantiateModel(args)

    # Train model on the first 80% of the dataset
    trainingSplit = int(len(_DATASET) * 0.80)
    model = trainModel(model, _DATASET[:trainingSplit])

    # Test model on the full dataset
    accuracyPct = testModel(model, _DATASET)

    # Validate serialization - testing after reloading should give same result
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    print
    print "Testing serialization..."
    newAccuracyPct = testModel(newModel, _DATASET)
    if accuracyPct == newAccuracyPct:
        print "Serialization validated."
    else:
        print(
            "Inconsistent results before ({}) and after ({}) saving/loading "
            "the model!".format(accuracyPct, newAccuracyPct))
コード例 #8
0
def executeModelLifecycle(args, trainingData, labelRefs):
  """ Execute model lifecycle: create a model, train it, save it, reload it.

  @param args (argparse) Arguments used in classification model API experiments.
  @param trainingData (dict) Keys are document numbers, values are three-tuples
      of the document (str), labels (list), and document ID (int).
  @param labelRefs (list) Label names (str) corresponding to label indices.

  @return (two-tuple) Original and new models.
  """
  model = instantiateModel(args)
  model = trainModel(model, trainingData, labelRefs, args.verbosity)
  model.save(args.modelDir)
  newModel = ClassificationModel.load(args.modelDir)
  return model, newModel
コード例 #9
0
def executeModelLifecycle(args, trainingData, labelRefs):
    """ Execute model lifecycle: create a model, train it, save it, reload it.

  @param args (argparse) Arguments used in classification model API experiments.
  @param trainingData (dict) Keys are document numbers, values are three-tuples
      of the document (str), labels (list), and document ID (int).
  @param labelRefs (list) Label names (str) corresponding to label indices.

  @return (two-tuple) Original and new models.
  """
    model = instantiateModel(args)
    model = trainModel(model, trainingData, labelRefs, args.verbosity)
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    return model, newModel
コード例 #10
0
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (trainingData, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args,
                         [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14])

  model = ClassificationModel.load(args.modelDir)

  analyzeModel(args, model, documentTextMap)

  return model
コード例 #11
0
def runExperiment(args, trainingData, testData):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  model = createModel(args)
  model = trainModel(args, model, trainingData)
  testModel(args, model, testData)

  # Test serialization - should give same result as above
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  print
  print "==========================Testing after de-serialization========"
  testModel(args, newmodel, testData)
コード例 #12
0
def runExperiment(args, trainingData, testData):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

    model = createModel(args)
    model = trainModel(args, model, trainingData)
    testModel(args, model, testData)

    # Test serialization - should give same result as above
    model.save(args.modelDir)
    newmodel = ClassificationModel.load(args.modelDir)
    print
    print "==========================Testing after de-serialization========"
    testModel(args, newmodel, testData)
コード例 #13
0
def setupExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model.

  @return newModel (ClassificationModel) The restored NLP model.
  @return dataSet (list) Each item is a list representing a data sample, with
      the text string, list of label indices, and the sample ID.
  """
    dataSet, labelRefs, _, _ = readDataAndReshuffle(args)
    args.numLabels = len(labelRefs)

    # Create a model, train it, save it, reload it
    model = instantiateModel(args)
    model = trainModel(model, dataSet, labelRefs, args.verbosity)
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)

    return newModel, dataSet
コード例 #14
0
def setupExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model.

  @return newModel (ClassificationModel) The restored NLP model.
  @return dataSet (list) Each item is a list representing a data sample, with
      the text string, list of label indices, and the sample ID.
  """
  dataSet, labelRefs, _, _ = readDataAndReshuffle(args)
  args.numLabels = len(labelRefs)

  # Create a model, train it, save it, reload it
  model = instantiateModel(args)
  model = trainModel(model, dataSet, labelRefs, args.verbosity)
  model.save(args.modelDir)
  newModel = ClassificationModel.load(args.modelDir)

  return newModel, dataSet
コード例 #15
0
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

    (trainingData, labelRefs,
     documentCategoryMap, documentTextMap) = readDataAndReshuffle(
         args, [8, 9, 10, 5, 6, 11, 13, 0, 1, 2, 3, 4, 7, 12, 14])

    # Create model
    model = instantiateModel(args)

    model = trainModel(args, model, trainingData, labelRefs)
    model.save(args.modelDir)
    newmodel = ClassificationModel.load(args.modelDir)
    testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)

    # Print profile information
    print
    model.dumpProfile()

    return model
コード例 #16
0
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (trainingData, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args,
                         [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14])

  # Create model
  model = instantiateModel(args)

  model = trainModel(args, model, trainingData, labelRefs)
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)

  # Print profile information
  print
  model.dumpProfile()

  return model