def __init__(self, arg, **args): """ :Parameters: - `arg` - another ModelSelector or a Param object :Keywords: - `measure` - which measure of accuracy to use for selecting the best classifier (default = 'balancedSuccessRate') supported measures are: 'balancedSuccessRate', 'successRate', 'roc', 'roc50' (you can substitute any number instead of 50) - `numFolds` - number of CV folds to use when performing model selection - `foldsToPerform` - the number of folds to actually perform """ Classifier.__init__(self, **args) if arg.__class__ == self.__class__: self.param = arg.param.__class__(arg.param) self.measure = arg.measure self.numFolds = arg.numFolds elif arg.__class__.__name__.find('Param') >= 0: self.param = arg.__class__(arg) else: raise ValueError, 'wrong type of input for ModelSelector' self.classifier = None
def __init__(self, arg, **args) : """ :Parameters: - `arg` - another ModelSelector or a Param object :Keywords: - `measure` - which measure of accuracy to use for selecting the best classifier (default = 'balancedSuccessRate') supported measures are: 'balancedSuccessRate', 'successRate', 'roc', 'roc50' (you can substitute any number instead of 50) - `numFolds` - number of CV folds to use when performing model selection - `foldsToPerform` - the number of folds to actually perform """ Classifier.__init__(self, **args) if arg.__class__ == self.__class__ : self.param = arg.param.__class__(arg.param) self.measure = arg.measure self.numFolds = arg.numFolds elif arg.__class__.__name__.find('Param') >= 0 : self.param = arg.__class__(arg) else : raise ValueError, 'wrong type of input for ModelSelector' self.classifier = None
def train(self, data, **args) : """ :Keywords: - `train` - boolean - whether to train the best classifier (default: True) - `vdata` - data to use for testing instead of using cross-validation (not implemented yet) """ Classifier.train(self, data, **args) kernel = ker.Gaussian() gammaSelect = ModelSelector(Param(svm.SVM(kernel, C = self.Clow), 'kernel.gamma', self.gamma), measure = self.measure, numFolds = self.numFolds) gammaSelect.train(data) kernel = ker.Gaussian(gamma = gammaSelect.classifier.kernel.gamma) cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C), measure = self.measure, numFolds = self.numFolds) cSelect.train(data) self.classifier = cSelect.classifier.__class__(cSelect.classifier) if 'train' not in args or args['train'] is True : self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime() self.classifier.log.classifier = self.classifier.__class__(self.classifier)
def train(self, data, **args) : """ :Keywords: - `train` - boolean - whether to train the best classifier (default: True) """ Classifier.train(self, data, **args) maxSuccessRate = 0 bestClassifier = None classifierIdx = 0 args['numFolds'] = self.numFolds args['foldsToPerform'] = self.foldsToPerform for r in self.param.stratifiedCV(data, **args) : successRate = getattr(r, self.measure) if successRate > maxSuccessRate : bestClassifier = classifierIdx maxSuccessRate = successRate classifierIdx += 1 self.log.maxSuccessRate = maxSuccessRate self.classifier = self.param.classifiers[bestClassifier].__class__( self.param.classifiers[bestClassifier]) if 'train' not in args or args['train'] is True : self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime() self.classifier.log.classifier = str(self.classifier.__class__(self.classifier))
def train(self, data, **args): """ :Keywords: - `train` - boolean - whether to train the best classifier (default: True) """ Classifier.train(self, data, **args) maxSuccessRate = 0 bestClassifier = None classifierIdx = 0 args['numFolds'] = self.numFolds args['foldsToPerform'] = self.foldsToPerform for r in self.param.stratifiedCV(data, **args): successRate = getattr(r, self.measure) if successRate > maxSuccessRate: bestClassifier = classifierIdx maxSuccessRate = successRate classifierIdx += 1 self.log.maxSuccessRate = maxSuccessRate self.classifier = self.param.classifiers[bestClassifier].__class__( self.param.classifiers[bestClassifier]) if 'train' not in args or args['train'] is True: self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime() self.classifier.log.classifier = str( self.classifier.__class__(self.classifier))
def train(self, data, **args): Classifier.train(self, data, **args) numFeatures = [] n = 1 while n < data.numFeatures: numFeatures.append(n) n *= 2 self.classifiers = [ self.classifier.__class__(self.classifier) for i in range(len(numFeatures)) ] featureSelector = self.featureSelector.__class__(self.featureSelector) rankedFeatures = featureSelector.rank(data) for i in range(len(numFeatures)): selectedData = data.__class__(data) selectedData.keepFeatures(rankedFeatures[:numFeatures[i]]) self.classifiers[i].train(selectedData) self.classifiers[i].log.numFeatures = selectedData.numFeatures self.classifier.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): """ :Keywords: - `train` - boolean - whether to train the best classifier (default: True) - `vdata` - data to use for testing instead of using cross-validation (not implemented yet) """ Classifier.train(self, data, **args) kernel = ker.Gaussian() gammaSelect = ModelSelector(Param(svm.SVM(kernel, C=self.Clow), 'kernel.gamma', self.gamma), measure=self.measure, numFolds=self.numFolds) gammaSelect.train(data) kernel = ker.Gaussian(gamma=gammaSelect.classifier.kernel.gamma) cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C), measure=self.measure, numFolds=self.numFolds) cSelect.train(data) self.classifier = cSelect.classifier.__class__(cSelect.classifier) if 'train' not in args or args['train'] is True: self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime() self.classifier.log.classifier = self.classifier.__class__( self.classifier)
def train(self, data, **args) : Classifier.train(self, data, **args) # this should be the last command in the train function # if you redefine the "test" function you can follow the code in # assess.test to save the testingTime self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) for component in self.chain : component.train(data, **args) self.classifier.train(data, **args) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): Classifier.train(self, data, **args) for component in self.chain: component.train(data, **args) self.classifier.train(data, **args) self.log.trainingTime = self.getTrainingTime()
def __init__ (self, arg) : Classifier.__init__(self) if arg.__class__ == self.__class__ : self.classifiers = [classifier.__class__(classifier) for classifier in arg.classifiers] elif type(arg) == type([]) : self.classifiers = [classifier.__class__(classifier) for classifier in arg]
def train(self, data, **args) : Classifier.train(self, data, **args) if not data.__class__.__name__ == 'DataAggregate' : raise ValueError, 'train requires a DataAggregate dataset' for i in range(len(self.classifiers)) : self.classifiers[i].train(data.datas[i], **args) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): Classifier.train(self, data, **args) if not data.__class__.__name__ == 'DataAggregate': raise ValueError, 'train requires a DataAggregate dataset' for i in range(len(self.classifiers)): self.classifiers[i].train(data.datas[i], **args) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : Classifier.train(self, data, **args) self.featureSelector.select(data, **args) #self.numFeatures = data.numFeatures self.classifier.log.numFeatures = data.numFeatures self.classifier.log.features = data.featureID[:] self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime()
def train(self, data, **args): Classifier.train(self, data, **args) self.featureSelector.select(data, **args) #self.numFeatures = data.numFeatures self.classifier.log.numFeatures = data.numFeatures self.classifier.log.features = data.featureID[:] self.classifier.train(data, **args) self.classifier.log.trainingTime = self.getTrainingTime()
def __init__(self, classifier, **args) : Classifier.__init__(self, classifier, **args) if type(classifier) == type('') : return if (not hasattr(classifier, 'type')) or classifier.type != 'classifier' : raise ValueError, 'argument should be a classifier' if classifier.__class__ == self.__class__ : self.classifier = classifier.classifier.__class__( classifier.classifier) else : self.classifier = classifier.__class__(classifier)
def __init__(self, classifier, **args): Classifier.__init__(self, classifier, **args) if type(classifier) == type(''): return if (not hasattr(classifier, 'type')) or classifier.type != 'classifier': raise ValueError, 'argument should be a classifier' if classifier.__class__ == self.__class__: self.classifier = classifier.classifier.__class__( classifier.classifier) else: self.classifier = classifier.__class__(classifier)
def __init__(self, arg): Classifier.__init__(self) if arg.__class__ == self.__class__: self.classifiers = [ classifier.__class__(classifier) for classifier in arg.classifiers ] elif type(arg) == type([]): self.classifiers = [ classifier.__class__(classifier) for classifier in arg ]
def load(self, fileName, data) : """load a trained classifier from a file. Also provide the data on which the classifier was trained. It assumes the underlying binary classifier is an SVM""" from PyML import svm Classifier.train(self, data) file_handle = open(fileName) numClasses = self.labels.numClasses self.classifiers = [self.classifier.__class__(self.classifier) for i in range(numClasses)] for i in range(numClasses) : datai = data.__class__(data, deepcopy = self.classifier.deepcopy) datai = oneAgainstRest(datai, data.labels.classLabels[i]) self.classifiers[i] = svm.loadSVM(file_handle, datai)
def train(self, data, **args) : '''train k classifiers''' Classifier.train(self, data, **args) numClasses = self.labels.numClasses if numClasses <= 2: raise ValueError, 'Not a multi class problem' self.classifiers = [self.classifier.__class__(self.classifier) for i in range(numClasses)] for i in range(numClasses) : # make a copy of the data; this is done in case the classifier modifies the data datai = data.__class__(data, deepcopy = self.classifier.deepcopy) datai = oneAgainstRest(datai, data.labels.classLabels[i]) self.classifiers[i].train(datai) self.log.trainingTime = self.getTrainingTime()
def train(self, data, **args) : '''train k(k-1)/2 classifiers''' Classifier.train(self, data, **args) numClasses = self.labels.numClasses if numClasses <= 2: raise ValueError, 'Not a multi class problem' self.classifiers = misc.matrix((numClasses, numClasses)) for i in range(numClasses - 1) : for j in range(i+1, numClasses) : self.classifiers[i][j] = self.classifier.__class__(self.classifier) dataij=data.__class__(data, deepcopy = self.classifier.deepcopy, classID = [i,j]) self.classifiers[i][j].train(dataij) self.log.trainingTime = self.getTrainingTime()
def load(self, fileName, data): """load a trained classifier from a file. Also provide the data on which the classifier was trained. It assumes the underlying binary classifier is an SVM""" from PyML import svm Classifier.train(self, data) file_handle = open(fileName) numClasses = self.labels.numClasses self.classifiers = [ self.classifier.__class__(self.classifier) for i in range(numClasses) ] for i in range(numClasses): datai = data.__class__(data, deepcopy=self.classifier.deepcopy) datai = oneAgainstRest(datai, data.labels.classLabels[i]) self.classifiers[i] = svm.loadSVM(file_handle, datai)
def train(self, data, **args): '''train k classifiers''' Classifier.train(self, data, **args) numClasses = self.labels.numClasses if numClasses <= 2: raise ValueError, 'Not a multi class problem' self.classifiers = [ self.classifier.__class__(self.classifier) for i in range(numClasses) ] for i in range(numClasses): # make a copy of the data; this is done in case the classifier modifies the data datai = data.__class__(data, deepcopy=self.classifier.deepcopy) datai = oneAgainstRest(datai, data.labels.classLabels[i]) self.classifiers[i].train(datai) self.log.trainingTime = self.getTrainingTime()
def __init__(self, arg1, arg2 = None) : Classifier.__init__(self) if arg1.__class__ == self.__class__ : other = arg1 self.classifier = other.classifier.__class__(other.classifier) self.featureSelector = other.featureSelector.__class__( other.featureSelector) else : for arg in (arg1, arg2) : if arg.type == 'classifier' : self.classifier = arg.__class__(arg) elif arg.type == 'featureSelector' : self.featureSelector = arg.__class__(arg) else : raise ValueError, \ 'argument should be either classifier or featureSelector'
def __init__(self, arg = None, **args) : """ :Parameters: - `arg` - another ModelSelector object :Keywords: - `C` - a list of values to try for C - `gamma` - a list of value to try for gamma - `measure` - which measure of accuracy to use for selecting the best classifier (default = 'balancedSuccessRate') supported measures are: 'balancedSuccessRate', 'successRate', 'roc', 'roc50' (you can substitute another number instead of 50) - `numFolds` - number of CV folds to use when performing model selection """ Classifier.__init__(self, arg, **args) self.classifier = None
def __init__(self, arg1, arg2=None): Classifier.__init__(self) if arg1.__class__ == self.__class__: other = arg1 self.classifier = other.classifier.__class__(other.classifier) self.featureSelector = other.featureSelector.__class__( other.featureSelector) else: for arg in (arg1, arg2): if arg.type == 'classifier': self.classifier = arg.__class__(arg) elif arg.type == 'featureSelector': self.featureSelector = arg.__class__(arg) else: raise ValueError, \ 'argument should be either classifier or featureSelector'
def __init__(self, arg=None, **args): """ :Parameters: - `arg` - another ModelSelector object :Keywords: - `C` - a list of values to try for C - `gamma` - a list of value to try for gamma - `measure` - which measure of accuracy to use for selecting the best classifier (default = 'balancedSuccessRate') supported measures are: 'balancedSuccessRate', 'successRate', 'roc', 'roc50' (you can substitute another number instead of 50) - `numFolds` - number of CV folds to use when performing model selection """ Classifier.__init__(self, arg, **args) self.classifier = None
def __init__(self, arg) : """ :Parameters: - `arg` - a Chain object of a list of objects, each of which implements a 'train', 'test' and has a copy constructor """ Classifier.__init__(self) if arg.__class__ == self.__class__ : other = arg self.classifier = other.classifier.__class__(other.classifier) self.chain = [component.__class__(component) for component in other.chain] elif type(arg) == type([]) : self.classifier = arg[-1].__class__(arg[-1]) self.chain = [arg[i].__class__(arg[i]) for i in range(len(arg) - 1)]
def train(self, data, **args): '''train k(k-1)/2 classifiers''' Classifier.train(self, data, **args) numClasses = self.labels.numClasses if numClasses <= 2: raise ValueError, 'Not a multi class problem' self.classifiers = misc.matrix((numClasses, numClasses)) for i in range(numClasses - 1): for j in range(i + 1, numClasses): self.classifiers[i][j] = self.classifier.__class__( self.classifier) dataij = data.__class__(data, deepcopy=self.classifier.deepcopy, classID=[i, j]) self.classifiers[i][j].train(dataij) self.log.trainingTime = self.getTrainingTime()
def __init__(self, arg): """ :Parameters: - `arg` - a Chain object of a list of objects, each of which implements a 'train', 'test' and has a copy constructor """ Classifier.__init__(self) if arg.__class__ == self.__class__: other = arg self.classifier = other.classifier.__class__(other.classifier) self.chain = [ component.__class__(component) for component in other.chain ] elif type(arg) == type([]): self.classifier = arg[-1].__class__(arg[-1]) self.chain = [ arg[i].__class__(arg[i]) for i in range(len(arg) - 1) ]
def __init__(self, arg = None, **args) : Classifier.__init__(self, **args)