def train(self, data, **args): Classifier.train(self, data, **args) self.oneClass = OneClassSVM(nu=self.nu, eps=self.eps) self.oneClass.train(data) self.data = data print 'computing connected components' self.clusters = self.connectedComponents()
def train(self, data, **args) : Classifier.train(self, data, **args) # this should be the last command in the train function # if you redefine the "test" function you can follow the code in # assess.test to save the testingTime self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) self.oneClass = OneClassSVM(nu = self.nu, eps = self.eps) self.oneClass.train(data) self.data = data print 'computing connected components' self.clusters = self.connectedComponents()
def train(self, data, **args) : Classifier.train(self, data, **args) self.numClasses = data.labels.numClasses if data.isWrapper : self.knnc = knn.KNN(self.k) self.knnc.train(data.castToBase()) self.data = data self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): Classifier.train(self, data, **args) if self.labels.numClasses != 2: raise ValueError, 'number of classes is not 2' if self.mode == 'cv': self.classifier.train(data, **args) numTries = 0 maxNumTries = 5 success = False while not success and numTries < maxNumTries: numTries += 1 if self.mode == 'cv': fittingData = data r = self.classifier.stratifiedCV(data, self.numFolds) elif self.mode == 'holdOut': fittingData, trainingData = sample.splitDataset( data, self.fittingFraction) self.classifier.train(trainingData, **args) r = self.classifier.test(fittingData) else: raise ValueError, 'unknown mode for Platt' self.labels = self.classifier.labels prior1 = fittingData.labels.classSize[1] prior0 = fittingData.labels.classSize[0] out = numpy.array(r.Y, numpy.float_) try: self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY) success = True except: pass if not success: print 'platt not successful' self.A = None self.B = None results = self.classifier.test(data) maxPos = 1e-3 minNeg = -1e-3 for f in results.decisionFunc: if f > 0: if f > maxPos: maxPos = f elif f < 0: if f < minNeg: minNeg = f self.maxPos = maxPos self.minNeg = abs(minNeg) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) if self.labels.numClasses != 2 : raise ValueError, 'number of classes is not 2' if self.mode == 'cv' : self.classifier.train(data, **args) numTries = 0 maxNumTries = 5 success = False while not success and numTries < maxNumTries : numTries += 1 if self.mode == 'cv' : fittingData = data r = self.classifier.stratifiedCV(data, self.numFolds) elif self.mode == 'holdOut' : fittingData, trainingData = sample.split(data, self.fittingFraction) self.classifier.train(trainingData, **args) r = self.classifier.test(fittingData) else : raise ValueError, 'unknown mode for Platt' self.labels = self.classifier.labels prior1 = fittingData.labels.classSize[1] prior0 = fittingData.labels.classSize[0] out = numpy.array(r.Y, numpy.float_) try : self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY) success = True except : pass if not success : print 'platt not successful' self.A = None self.B = None results = self.classifier.test(data) maxPos = 1e-3 minNeg = -1e-3 for f in results.decisionFunc : if f > 0 : if f > maxPos : maxPos = f elif f < 0 : if f < minNeg : minNeg = f self.maxPos = maxPos self.minNeg = abs(minNeg) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): Classifier.train(self, data, **args) self.data = data if self.kernel is not None: self.data.attachKernel(self.kernel) if data.kernel.__class__.__name__ == 'Linear': self.train_linear(data) else: self.train_nonlinear(data) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) self.data = data if self.kernel is not None : self.data.attachKernel(self.kernel) if data.kernel.__class__.__name__ == 'Linear' : self.train_linear(data) else : self.train_nonlinear(data) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): """ train an SVM """ if data.__class__.__name__ in containersNotSupported: raise ValueError, 'convert your data into one of the C++ containers' Classifier.train(self, data, **args) if self.kernel is not None: data.attachKernel(self.kernel) # libsvm optimizer can only be used with vector data: if (not data.isVector) and self.optimizer == 'libsvm': self.optimizer = 'mysmo' isPrimal = False if 'alpha' in args: print 'loading model' alpha = args['alpha'] b = args['b'] svID = args['svID'] elif self.optimizer == 'libsvm': alpha, b, svID = self.trainLibsvm(data, **args) elif self.optimizer == 'liblinear': isPrimal = True w, b = self.trainLiblinear(data, **args) elif self.optimizer == 'gist': alpha, b, svID = self.trainGist(data, **args) elif self.optimizer == 'gradient': alpha, b, svID = self.trainGradient(data, **args) else: alpha, b, svID = self.trainMySMO(data, **args) if isPrimal: self.model = self.modelDispatcher(data, w=w, b=b) else: self.model = self.modelDispatcher(data, svID=svID, alpha=alpha, b=b) self.trained = True if not isPrimal: self.log.numSV = len(alpha) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : """ train an SVM """ if data.__class__.__name__ in containersNotSupported : raise ValueError, 'convert your data into one of the C++ containers' Classifier.train(self, data, **args) if self.kernel is not None : data.attachKernel(self.kernel) # libsvm optimizer can only be used with vector data: if (not data.isVector) and self.optimizer == 'libsvm' : self.optimizer = 'mysmo' isPrimal = False if 'alpha' in args : print 'loading model' alpha = args['alpha'] b = args['b'] svID = args['svID'] elif self.optimizer == 'libsvm' : alpha,b,svID = self.trainLibsvm(data, **args) elif self.optimizer == 'liblinear' : #isPrimal = True #w, b = self.trainLiblinear(data, **args) alpha,b,svID = self.trainLiblinear(data, **args) elif self.optimizer == 'gist' : alpha,b,svID = self.trainGist(data, **args) elif self.optimizer == 'gradient' : alpha,b,svID = self.trainGradient(data, **args) else : alpha,b,svID = self.trainMySMO(data, **args) if isPrimal : self.model = modelDispatcher(data, w=w, b=b) else : self.model = modelDispatcher(data, svID=svID, alpha=alpha, b=b) self.trained = True if not isPrimal : self.log.numSV = len(alpha) self.log.trainingTime = self.getTrainingTime()
def __init__(self, arg=None, **args): Classifier.__init__(self) self.trainingDirectory = None self.testingDirectory = None self.maxSize = 2e6 self.numTrees = 200 self.numFeatures = 0 if arg.__class__ == self.__class__: other = arg self.numTrees = other.numTrees self.numFeatures = other.numFeatures if 'numTrees' in args: self.numTrees = args['numTrees'] if 'numFeatures' in args: self.numFeatures = args['numFeatures']
def __init__(self, arg = None, **args) : Classifier.__init__(self) self.trainingDirectory = None self.testingDirectory = None self.maxSize = 2e6 self.numTrees = 200 self.numFeatures = 0 if arg.__class__ == self.__class__ : other = arg self.numTrees = other.numTrees self.numFeatures = other.numFeatures if 'numTrees' in args : self.numTrees = args['numTrees'] if 'numFeatures' in args : self.numFeatures = args['numFeatures']
def train(self, data, **args): """ train an SVM """ if data.__class__.__name__ in containersNotSupported: raise ValueError, "convert your data into one of the C++ containers" Classifier.train(self, data, **args) if self.kernel is not None: data.attachKernel(self.kernel) # libsvm optimizer can only be used with vector data: if (not data.isVector) and self.optimizer == "libsvm": self.optimizer = "mysmo" isPrimal = False if "alpha" in args: print "loading model" alpha = args["alpha"] b = args["b"] svID = args["svID"] elif self.optimizer == "libsvm": alpha, b, svID = self.trainLibsvm(data, **args) elif self.optimizer == "liblinear": isPrimal = True w, b = self.trainLiblinear(data, **args) elif self.optimizer == "gist": alpha, b, svID = self.trainGist(data, **args) elif self.optimizer == "gradient": alpha, b, svID = self.trainGradient(data, **args) else: alpha, b, svID = self.trainMySMO(data, **args) if isPrimal: self.model = self.modelDispatcher(data, w=w, b=b) else: self.model = self.modelDispatcher(data, svID=svID, alpha=alpha, b=b) self.trained = True if not isPrimal: self.log.numSV = len(alpha) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) self.data = data if self.kernel is not None : self.data.attachKernel(self.kernel) Y = numpy.array(data.labels.Y) Y = Y * 2 - 1 K = numpy.zeros((len(data), len(data)), numpy.float_) print 'getting kernel matrix' for i in range(len(data) - 1) : for j in range(i, len(data)) : K[i][j] = data.kernel.eval(data, i, j) K[j][i] = K[i][j] K = K + self.ridge * numpy.eye(len(data)) print 'about to call numpy.linalg.inv' self.alpha = numpy.dot(Y, numpy.linalg.inv(K)) self.log.trainingTime = self.getTrainingTime() print 'done training'
def __init__(self, arg = None, **args): """ :Parameters: - `arg` - another SVM object or a kernel object; if no argument is given the kernel function of the training dataset is used :Keywords: - `C` - the svm C parameter - `Cmode` - the way the C parameter is used; values: 'equal', 'classProb', 'fromData'. In 'equal' mode C is set to be the same for both classes In 'classProb' mode each class is assigned a C value that is proportional to the size of the other class. This results in margin error costs being proportional to the ratio of the sizes of the two classes. This is useful for datasets with an unbalanced class distribution. In 'fromData' the value of C for each pattern is taken from the 'C' attribute of the training data. - `optimizer` - which optimizer to use. the options are: 'libsvm' -- run libsvm 'liblinear' -- use liblinear (linear svm only) in this case you have the option to set the loss function - either 'l1' or 'l2' 'mysmo' - use the PyML native optmizer (based on libsvm) 'gist' - use a gist-like optimizer. - `loss` - when using liblinear set this to 'l1' or 'l2' (default: 'l1') - `cacheSize` - size of the kernel cache (in MB). """ Classifier.__init__(self, arg, **args) self.kernel = None if arg.__class__ == self.__class__ : if arg.kernel is not None : self.kernel = arg.kernel.__class__(arg.kernel) elif hasattr(arg, 'type') and arg.type == 'kernel' : self.kernel = arg.__class__(arg) elif arg is not None : raise ValueError, 'unknown type of argument'
def train(self, data, **args): Classifier.train(self, data, **args) self.featureID = data.featureID[:] if data.numFeatures * len(data) > 1.1 * self.maxSize: self.train2(data, **args) self.cleanup() # the location of the output from training: #self.trainingDirectory = tempfile.mkdtemp() self.trainingDirectory = '/Users/asa/temp' print 'RF directory:', self.trainingDirectory self.trainingExecutable = os.path.join(self.trainingDirectory, 'rf') self.trainingCode = os.path.join(self.trainingDirectory, 'rf.f') #os.mkdir(self.trainingDirectory) writeData(data) writeCode(self, self.trainingCode, self.trainingExecutable, data) dir = os.getcwd() os.chdir(self.trainingDirectory) os.system('./rf') os.chdir(dir)
def train(self, data, **args): Classifier.train(self, data, **args) self.data = data if self.kernel is not None: self.data.attachKernel(self.kernel) Y = numpy.array(data.labels.Y) Y = Y * 2 - 1 K = numpy.zeros((len(data), len(data)), numpy.float_) print 'getting kernel matrix' for i in range(len(data) - 1): for j in range(i, len(data)): K[i][j] = data.kernel.eval(data, i, j) K[j][i] = K[i][j] K = K + self.ridge * numpy.eye(len(data)) print 'about to call numpy.linalg.inv' self.alpha = numpy.dot(Y, numpy.linalg.inv(K)) self.log.trainingTime = self.getTrainingTime() print 'done training'
def train(self, data, **args) : Classifier.train(self, data, **args) self.featureID = data.featureID[:] if data.numFeatures * len(data) > 1.1 * self.maxSize : self.train2(data, **args) self.cleanup() # the location of the output from training: #self.trainingDirectory = tempfile.mkdtemp() self.trainingDirectory = '/Users/asa/temp' print 'RF directory:', self.trainingDirectory self.trainingExecutable = os.path.join(self.trainingDirectory, 'rf') self.trainingCode = os.path.join(self.trainingDirectory, 'rf.f') #os.mkdir(self.trainingDirectory) writeData(data) writeCode(self, self.trainingCode, self.trainingExecutable, data) dir = os.getcwd() os.chdir(self.trainingDirectory) os.system('./rf') os.chdir(dir)
def __init__(self, arg=None, **args): Classifier.__init__(self, arg, **args) if self.regression: self.resultsObject = resultsObjects.RegressionResults self.classify = self.decisionFunc
def __init__(self, arg = None, **args) : Classifier.__init__(self, arg, **args)
def __init__(self, arg = None, **args) : Classifier.__init__(self, arg, **args) if self.regression : self.resultsObject = resultsObjects.RegressionResults self.classify = self.decisionFunc