def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs, **modelFactoryKwargs): """ Creates a new model and trains it, or loads a previously trained model from specified loadPath. """ # The model name must be an identifier defined in the model factory mapping. modelType = getattr(ClassificationModelTypes, self._mapModelName(modelName)) if loadPath: # User has explicitly specified a load path and expects a model to exist try: model = ClassificationModel.load(loadPath) except IOError as exc: # Model was not found, user may have specified incorrect path, DO NOT # attempt to create a new model and raise an exception raise ImbuUnableToLoadModelError(exc) else: # User has not specified a load path, defer to default case and # gracefully create a new model try: model = ClassificationModel.load(loadPath) except IOError as exc: model = self._modelFactory(modelName, savePath, *modelFactoryArgs, **modelFactoryKwargs) self.train(model, savePath) return model
def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs, **modelFactoryKwargs): """ Creates a new model and trains it, or loads a previously trained model from specified loadPath. """ # The model name must be an identifier defined in the model factory mapping. modelType = getattr(ClassificationModelTypes, self._mapModelName(modelName)) if loadPath: # User has explicitly specified a load path and expects a model to exist try: model = ClassificationModel.load(loadPath) except IOError as exc: # Model was not found, user may have specified incorrect path, DO NOT # attempt to create a new model and raise an exception raise ImbuUnableToLoadModelError(exc) else: # User has not specified a load path, defer to default case and # gracefully create a new model try: model = ClassificationModel.load(loadPath) except IOError as exc: model = self._modelFactory(modelName, savePath, *modelFactoryArgs, **modelFactoryKwargs) self.train(model, savePath) return model
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (dataSet, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args) # Train only with documents whose id's are divisible by 100 trainingData = [x for i, x in enumerate(dataSet) if x[2] % 100 == 0] testData = [x for i, x in enumerate(dataSet) if x[2] % 100 != 0] print "Num training", len(trainingData), "num testing", len(testData) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) testModel(args, newmodel, testData, labelRefs, documentCategoryMap) return model
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ # Create model model = instantiateModel(args) # Train model on the first 80% of the dataset trainingSplit = int(len(_DATASET) * 0.80) model = trainModel(model, _DATASET[:trainingSplit]) # Test model on the full dataset accuracyPct = testModel(model, _DATASET) # Validate serialization - testing after reloading should give same result model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) print print "Testing serialization..." newAccuracyPct = testModel(newModel, _DATASET) if accuracyPct == newAccuracyPct: print "Serialization validated." else: print ( "Inconsistent results before ({}) and after ({}) saving/loading " "the model!".format(accuracyPct, newAccuracyPct) )
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (dataSet, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args) # Train only with documents whose id's are divisible by 100 trainingData = [x for i,x in enumerate(dataSet) if x[2]%100==0] testData = [x for i,x in enumerate(dataSet) if x[2]%100!=0] print "Num training",len(trainingData),"num testing",len(testData) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) testModel(args, newmodel, testData, labelRefs, documentCategoryMap) return model
def _executeModelLifecycle(self, modelName, modelDir): """ Create a model, train it, save it, reload it, return it.""" model = createModel(modelName, **self.modelParams) model = trainModel(model, self.dataSet) model.save(modelDir) del model return ClassificationModel.load(modelDir)
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ # Create model model = instantiateModel(args) # Train model on the first 80% of the dataset trainingSplit = int(len(_DATASET) * 0.80) model = trainModel(model, _DATASET[:trainingSplit]) # Test model on the full dataset accuracyPct = testModel(model, _DATASET) # Validate serialization - testing after reloading should give same result model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) print print "Testing serialization..." newAccuracyPct = testModel(newModel, _DATASET) if accuracyPct == newAccuracyPct: print "Serialization validated." else: print( "Inconsistent results before ({}) and after ({}) saving/loading " "the model!".format(accuracyPct, newAccuracyPct))
def executeModelLifecycle(args, trainingData, labelRefs): """ Execute model lifecycle: create a model, train it, save it, reload it. @param args (argparse) Arguments used in classification model API experiments. @param trainingData (dict) Keys are document numbers, values are three-tuples of the document (str), labels (list), and document ID (int). @param labelRefs (list) Label names (str) corresponding to label indices. @return (two-tuple) Original and new models. """ model = instantiateModel(args) model = trainModel(model, trainingData, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return model, newModel
def executeModelLifecycle(args, trainingData, labelRefs): """ Execute model lifecycle: create a model, train it, save it, reload it. @param args (argparse) Arguments used in classification model API experiments. @param trainingData (dict) Keys are document numbers, values are three-tuples of the document (str), labels (list), and document ID (int). @param labelRefs (list) Label names (str) corresponding to label indices. @return (two-tuple) Original and new models. """ model = instantiateModel(args) model = trainModel(model, trainingData, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return model, newModel
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args, [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14]) model = ClassificationModel.load(args.modelDir) analyzeModel(args, model, documentTextMap) return model
def runExperiment(args, trainingData, testData): """ Create model according to args, train on training data, save model, restore model, test on test data. """ model = createModel(args) model = trainModel(args, model, trainingData) testModel(args, model, testData) # Test serialization - should give same result as above model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) print print "==========================Testing after de-serialization========" testModel(args, newmodel, testData)
def runExperiment(args, trainingData, testData): """ Create model according to args, train on training data, save model, restore model, test on test data. """ model = createModel(args) model = trainModel(args, model, trainingData) testModel(args, model, testData) # Test serialization - should give same result as above model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) print print "==========================Testing after de-serialization========" testModel(args, newmodel, testData)
def setupExperiment(args): """ Create model according to args, train on training data, save model, restore model. @return newModel (ClassificationModel) The restored NLP model. @return dataSet (list) Each item is a list representing a data sample, with the text string, list of label indices, and the sample ID. """ dataSet, labelRefs, _, _ = readDataAndReshuffle(args) args.numLabels = len(labelRefs) # Create a model, train it, save it, reload it model = instantiateModel(args) model = trainModel(model, dataSet, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return newModel, dataSet
def setupExperiment(args): """ Create model according to args, train on training data, save model, restore model. @return newModel (ClassificationModel) The restored NLP model. @return dataSet (list) Each item is a list representing a data sample, with the text string, list of label indices, and the sample ID. """ dataSet, labelRefs, _, _ = readDataAndReshuffle(args) args.numLabels = len(labelRefs) # Create a model, train it, save it, reload it model = instantiateModel(args) model = trainModel(model, dataSet, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return newModel, dataSet
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle( args, [8, 9, 10, 5, 6, 11, 13, 0, 1, 2, 3, 4, 7, 12, 14]) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) # Print profile information print model.dumpProfile() return model
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args, [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14]) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) # Print profile information print model.dumpProfile() return model