Ejemplo n.º 1
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        self.oneClass = OneClassSVM(nu=self.nu, eps=self.eps)
        self.oneClass.train(data)
        self.data = data
        print 'computing connected components'
        self.clusters = self.connectedComponents()
Ejemplo n.º 2
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)

        # this should be the last command in the train function
        # if you redefine the "test" function you can follow the code in
        # assess.test to save the testingTime
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 3
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        self.oneClass = OneClassSVM(nu = self.nu, eps = self.eps)
        self.oneClass.train(data)
        self.data = data
        print 'computing connected components'
        self.clusters = self.connectedComponents()
Ejemplo n.º 4
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        self.numClasses = data.labels.numClasses
        if data.isWrapper :
            self.knnc = knn.KNN(self.k)
            self.knnc.train(data.castToBase())
        self.data = data

        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 5
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        if self.labels.numClasses != 2:
            raise ValueError, 'number of classes is not 2'

        if self.mode == 'cv':
            self.classifier.train(data, **args)

        numTries = 0
        maxNumTries = 5
        success = False
        while not success and numTries < maxNumTries:
            numTries += 1
            if self.mode == 'cv':
                fittingData = data
                r = self.classifier.stratifiedCV(data, self.numFolds)
            elif self.mode == 'holdOut':
                fittingData, trainingData = sample.splitDataset(
                    data, self.fittingFraction)
                self.classifier.train(trainingData, **args)
                r = self.classifier.test(fittingData)
            else:
                raise ValueError, 'unknown mode for Platt'
            self.labels = self.classifier.labels

        prior1 = fittingData.labels.classSize[1]
        prior0 = fittingData.labels.classSize[0]
        out = numpy.array(r.Y, numpy.float_)
        try:
            self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY)
            success = True
        except:
            pass

        if not success:
            print 'platt not successful'
            self.A = None
            self.B = None
            results = self.classifier.test(data)
            maxPos = 1e-3
            minNeg = -1e-3
            for f in results.decisionFunc:
                if f > 0:
                    if f > maxPos:
                        maxPos = f
                elif f < 0:
                    if f < minNeg:
                        minNeg = f
            self.maxPos = maxPos
            self.minNeg = abs(minNeg)

        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 6
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        if self.labels.numClasses != 2 :
            raise ValueError, 'number of classes is not 2'

        if self.mode == 'cv' :
            self.classifier.train(data, **args)

        numTries = 0
        maxNumTries = 5
        success = False
        while not success and numTries < maxNumTries :
            numTries += 1
            if self.mode == 'cv' :
                fittingData = data
                r = self.classifier.stratifiedCV(data, self.numFolds)
            elif self.mode == 'holdOut' :
                fittingData, trainingData = sample.split(data, self.fittingFraction)
                self.classifier.train(trainingData, **args)
                r = self.classifier.test(fittingData)
            else :
                raise ValueError, 'unknown mode for Platt'
	    self.labels = self.classifier.labels

        prior1 = fittingData.labels.classSize[1]
        prior0 = fittingData.labels.classSize[0]
        out = numpy.array(r.Y, numpy.float_)
        try :
            self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY)		
            success = True
        except :
            pass

        if not success :
            print 'platt not successful'
            self.A = None
            self.B = None
            results = self.classifier.test(data)
            maxPos = 1e-3
            minNeg = -1e-3
            for f in results.decisionFunc :
                if f > 0 :
                    if f > maxPos :
                        maxPos = f
                elif f < 0 :
                    if f < minNeg :
                        minNeg = f
            self.maxPos = maxPos
            self.minNeg = abs(minNeg)
        
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 7
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        self.data = data

        if self.kernel is not None:
            self.data.attachKernel(self.kernel)
        if data.kernel.__class__.__name__ == 'Linear':
            self.train_linear(data)
        else:
            self.train_nonlinear(data)

        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 8
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        self.data = data
        
        if self.kernel is not None :
            self.data.attachKernel(self.kernel)
        if data.kernel.__class__.__name__ == 'Linear' :
            self.train_linear(data)
        else :
            self.train_nonlinear(data)
            
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 9
0
    def train(self, data, **args):
        """
        train an SVM
        """

        if data.__class__.__name__ in containersNotSupported:
            raise ValueError, 'convert your data into one of the C++ containers'

        Classifier.train(self, data, **args)
        if self.kernel is not None:
            data.attachKernel(self.kernel)

        # libsvm optimizer can only be used with vector data:
        if (not data.isVector) and self.optimizer == 'libsvm':
            self.optimizer = 'mysmo'

        isPrimal = False
        if 'alpha' in args:
            print 'loading model'
            alpha = args['alpha']
            b = args['b']
            svID = args['svID']
        elif self.optimizer == 'libsvm':
            alpha, b, svID = self.trainLibsvm(data, **args)
        elif self.optimizer == 'liblinear':
            isPrimal = True
            w, b = self.trainLiblinear(data, **args)
        elif self.optimizer == 'gist':
            alpha, b, svID = self.trainGist(data, **args)
        elif self.optimizer == 'gradient':
            alpha, b, svID = self.trainGradient(data, **args)
        else:
            alpha, b, svID = self.trainMySMO(data, **args)

        if isPrimal:
            self.model = self.modelDispatcher(data, w=w, b=b)
        else:
            self.model = self.modelDispatcher(data,
                                              svID=svID,
                                              alpha=alpha,
                                              b=b)

        self.trained = True
        if not isPrimal:
            self.log.numSV = len(alpha)
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 10
0
    def train(self, data, **args) :

        """
        train an SVM
        """
    
        if data.__class__.__name__ in containersNotSupported :
            raise ValueError, 'convert your data into one of the C++ containers'

        Classifier.train(self, data, **args)
        if self.kernel is not None :
            data.attachKernel(self.kernel)

        # libsvm optimizer can only be used with vector data:
        if (not data.isVector) and self.optimizer == 'libsvm' :
            self.optimizer = 'mysmo'

        isPrimal = False
        if 'alpha' in args :
            print 'loading model'
            alpha = args['alpha']
            b = args['b']
            svID = args['svID']
        elif self.optimizer == 'libsvm' :
            alpha,b,svID = self.trainLibsvm(data, **args)
        elif self.optimizer == 'liblinear' :
            #isPrimal = True
            #w, b = self.trainLiblinear(data, **args)
            alpha,b,svID = self.trainLiblinear(data, **args)
        elif self.optimizer == 'gist' :
            alpha,b,svID = self.trainGist(data, **args)
        elif self.optimizer == 'gradient' :
            alpha,b,svID = self.trainGradient(data, **args)            
        else :
            alpha,b,svID = self.trainMySMO(data, **args)

        if isPrimal :
            self.model = modelDispatcher(data, w=w, b=b)
        else :
            self.model = modelDispatcher(data, svID=svID, alpha=alpha, b=b)

        self.trained = True
        if not isPrimal :
            self.log.numSV = len(alpha)
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 11
0
    def __init__(self, arg=None, **args):

        Classifier.__init__(self)
        self.trainingDirectory = None
        self.testingDirectory = None

        self.maxSize = 2e6
        self.numTrees = 200
        self.numFeatures = 0
        if arg.__class__ == self.__class__:
            other = arg
            self.numTrees = other.numTrees
            self.numFeatures = other.numFeatures

        if 'numTrees' in args:
            self.numTrees = args['numTrees']
        if 'numFeatures' in args:
            self.numFeatures = args['numFeatures']
Ejemplo n.º 12
0
    def __init__(self, arg = None, **args) :

        Classifier.__init__(self)
        self.trainingDirectory = None
        self.testingDirectory = None

        self.maxSize = 2e6
        self.numTrees = 200
        self.numFeatures = 0
        if arg.__class__ == self.__class__ :
            other = arg
            self.numTrees = other.numTrees
            self.numFeatures = other.numFeatures
            
        if 'numTrees' in args :
            self.numTrees = args['numTrees']
        if 'numFeatures' in args :
            self.numFeatures = args['numFeatures']
Ejemplo n.º 13
0
    def train(self, data, **args):

        """
        train an SVM
        """

        if data.__class__.__name__ in containersNotSupported:
            raise ValueError, "convert your data into one of the C++ containers"

        Classifier.train(self, data, **args)
        if self.kernel is not None:
            data.attachKernel(self.kernel)

        # libsvm optimizer can only be used with vector data:
        if (not data.isVector) and self.optimizer == "libsvm":
            self.optimizer = "mysmo"

        isPrimal = False
        if "alpha" in args:
            print "loading model"
            alpha = args["alpha"]
            b = args["b"]
            svID = args["svID"]
        elif self.optimizer == "libsvm":
            alpha, b, svID = self.trainLibsvm(data, **args)
        elif self.optimizer == "liblinear":
            isPrimal = True
            w, b = self.trainLiblinear(data, **args)
        elif self.optimizer == "gist":
            alpha, b, svID = self.trainGist(data, **args)
        elif self.optimizer == "gradient":
            alpha, b, svID = self.trainGradient(data, **args)
        else:
            alpha, b, svID = self.trainMySMO(data, **args)

        if isPrimal:
            self.model = self.modelDispatcher(data, w=w, b=b)
        else:
            self.model = self.modelDispatcher(data, svID=svID, alpha=alpha, b=b)

        self.trained = True
        if not isPrimal:
            self.log.numSV = len(alpha)
        self.log.trainingTime = self.getTrainingTime()
Ejemplo n.º 14
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        self.data = data
        if self.kernel is not None :
            self.data.attachKernel(self.kernel)
            
        Y = numpy.array(data.labels.Y)
        Y = Y * 2 - 1

        K = numpy.zeros((len(data), len(data)), numpy.float_)
        print 'getting kernel matrix'
        for i in range(len(data) - 1) :
            for j in range(i, len(data)) :
                K[i][j] = data.kernel.eval(data, i, j)
                K[j][i] = K[i][j]
        K = K + self.ridge * numpy.eye(len(data))
        print 'about to call numpy.linalg.inv'
        self.alpha = numpy.dot(Y, numpy.linalg.inv(K))

        self.log.trainingTime = self.getTrainingTime()
        print 'done training'
Ejemplo n.º 15
0
    def __init__(self, arg = None, **args):

        """
        :Parameters:
          - `arg` - another SVM object or a kernel object; if no argument is given
            the kernel function of the training dataset is used
        
        :Keywords:
          - `C` - the svm C parameter
          - `Cmode` - the way the C parameter is used; values: 'equal', 'classProb',
            'fromData'.
            In 'equal' mode C is set to be the same for both classes
            In 'classProb' mode each class is assigned a C value that is 
            proportional to the size of the other class.  This results in 
            margin error costs being proportional to the ratio of the
            sizes of the two classes.  
            This is useful for datasets with an unbalanced class distribution.
            In 'fromData' the value of C for each pattern is taken from the
            'C' attribute of the training data.
          - `optimizer` - which optimizer to use.  the options are:
            'libsvm' -- run libsvm
            'liblinear' -- use liblinear (linear svm only)
            in this case you have the option to set the loss function - either 'l1' or 'l2'
            'mysmo' - use the PyML native optmizer (based on libsvm)
            'gist' - use a gist-like optimizer.
          - `loss` - when using liblinear set this to 'l1' or 'l2' (default: 'l1')
          - `cacheSize` - size of the kernel cache (in MB).
        """

        Classifier.__init__(self, arg, **args)

        self.kernel = None
        if arg.__class__ == self.__class__ :
            if arg.kernel is not None :
                self.kernel = arg.kernel.__class__(arg.kernel)
        elif hasattr(arg, 'type') and arg.type == 'kernel' :
            self.kernel = arg.__class__(arg)
        elif arg is not None :
            raise ValueError, 'unknown type of argument'
Ejemplo n.º 16
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        self.featureID = data.featureID[:]

        if data.numFeatures * len(data) > 1.1 * self.maxSize:
            self.train2(data, **args)

        self.cleanup()
        # the location of the output from training:
        #self.trainingDirectory = tempfile.mkdtemp()
        self.trainingDirectory = '/Users/asa/temp'
        print 'RF directory:', self.trainingDirectory
        self.trainingExecutable = os.path.join(self.trainingDirectory, 'rf')
        self.trainingCode = os.path.join(self.trainingDirectory, 'rf.f')
        #os.mkdir(self.trainingDirectory)
        writeData(data)
        writeCode(self, self.trainingCode, self.trainingExecutable, data)
        dir = os.getcwd()
        os.chdir(self.trainingDirectory)
        os.system('./rf')
        os.chdir(dir)
Ejemplo n.º 17
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        self.data = data
        if self.kernel is not None:
            self.data.attachKernel(self.kernel)

        Y = numpy.array(data.labels.Y)
        Y = Y * 2 - 1

        K = numpy.zeros((len(data), len(data)), numpy.float_)
        print 'getting kernel matrix'
        for i in range(len(data) - 1):
            for j in range(i, len(data)):
                K[i][j] = data.kernel.eval(data, i, j)
                K[j][i] = K[i][j]
        K = K + self.ridge * numpy.eye(len(data))
        print 'about to call numpy.linalg.inv'
        self.alpha = numpy.dot(Y, numpy.linalg.inv(K))

        self.log.trainingTime = self.getTrainingTime()
        print 'done training'
Ejemplo n.º 18
0
    def __init__(self, arg = None, **args):

        """
        :Parameters:
          - `arg` - another SVM object or a kernel object; if no argument is given
            the kernel function of the training dataset is used
        
        :Keywords:
          - `C` - the svm C parameter
          - `Cmode` - the way the C parameter is used; values: 'equal', 'classProb',
            'fromData'.
            In 'equal' mode C is set to be the same for both classes
            In 'classProb' mode each class is assigned a C value that is 
            proportional to the size of the other class.  This results in 
            margin error costs being proportional to the ratio of the
            sizes of the two classes.  
            This is useful for datasets with an unbalanced class distribution.
            In 'fromData' the value of C for each pattern is taken from the
            'C' attribute of the training data.
          - `optimizer` - which optimizer to use.  the options are:
            'libsvm' -- run libsvm
            'liblinear' -- use liblinear (linear svm only)
            in this case you have the option to set the loss function - either 'l1' or 'l2'
            'mysmo' - use the PyML native optmizer (based on libsvm)
            'gist' - use a gist-like optimizer.
          - `loss` - when using liblinear set this to 'l1' or 'l2' (default: 'l1')
          - `cacheSize` - size of the kernel cache (in MB).
        """

        Classifier.__init__(self, arg, **args)

        self.kernel = None
        if arg.__class__ == self.__class__ :
            if arg.kernel is not None :
                self.kernel = arg.kernel.__class__(arg.kernel)
        elif hasattr(arg, 'type') and arg.type == 'kernel' :
            self.kernel = arg.__class__(arg)
        elif arg is not None :
            raise ValueError, 'unknown type of argument'
Ejemplo n.º 19
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        self.featureID = data.featureID[:]
        
        if data.numFeatures * len(data) > 1.1 * self.maxSize :
            self.train2(data, **args)
        
        self.cleanup()
        # the location of the output from training:
        #self.trainingDirectory = tempfile.mkdtemp()
        self.trainingDirectory = '/Users/asa/temp'
        print 'RF directory:', self.trainingDirectory
        self.trainingExecutable = os.path.join(self.trainingDirectory, 'rf')
        self.trainingCode = os.path.join(self.trainingDirectory, 'rf.f')
        #os.mkdir(self.trainingDirectory)
        writeData(data)
        writeCode(self, self.trainingCode, self.trainingExecutable, data)
        dir = os.getcwd()
        os.chdir(self.trainingDirectory)
        os.system('./rf')
        os.chdir(dir)
Ejemplo n.º 20
0
    def __init__(self, arg=None, **args):

        Classifier.__init__(self, arg, **args)
        if self.regression:
            self.resultsObject = resultsObjects.RegressionResults
            self.classify = self.decisionFunc
Ejemplo n.º 21
0
    def __init__(self, arg = None, **args) :

        Classifier.__init__(self, arg, **args)
Ejemplo n.º 22
0
 def __init__(self, arg = None, **args) :
 
     Classifier.__init__(self, arg, **args)
     if self.regression :
         self.resultsObject = resultsObjects.RegressionResults
         self.classify = self.decisionFunc