Ejemplo n.º 1
0
    def trainNormalization(self):

        assert len(self.labels) >= 2

        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            # Learn the classes
            n_classes = 0
            self.class_map = {}

            for label in self.labels:
                if not self.class_map.has_key(label):
                    self.class_map[label] = n_classes
                    n_classes += 1

            if self.type == TYPE_MULTICLASS:
                assert n_classes >= 2
            if self.type == TYPE_TWOCLASS:
                assert n_classes == 2

            self.class_inv = {}
            for key, value in self.class_map.iteritems():
                self.class_inv[value] = key

            new_labels = []
            for each in self.labels:
                new_labels.append(self.class_map[each])
            self.labels = new_labels

        if self.type == TYPE_REGRESSION:
            self.reg_mean = mean(self.labels)
            self.reg_std = std(self.labels)

            new_labels = []
            for each in self.labels:
                new_labels.append((each - self.reg_mean) / self.reg_std)
            self.labels = new_labels

        #test length
        shape = self.vectors[0].shape
        assert len(shape) == 1

        for each in self.vectors:
            assert shape == each.shape

        #crate a data matrix
        data = array(self.vectors, 'd')
        if self.norm == NORM_AUTO:
            self.norm = NORM_VALUE
            if data.shape[1] > 128:
                self.norm = NORM_PCA

        #Setup value normalization
        if self.norm == NORM_VALUE:
            self.dmean = data.mean(axis=0)
            self.dstd = data.std(axis=0)
            self.vectors = (data - self.dmean) / self.dstd

        elif self.norm == NORM_PCA:
            self.pca = PCA()
            for vec in self.vectors:
                self.pca.addFeature(vec)

            if self.pca_basis > 1:
                self.pca.train(drop_front=self.pca_drop, number=self.pca_basis)
            else:
                self.pca.train(drop_front=self.pca_drop, energy=self.pca_basis)

            new_vectors = []
            for each in self.vectors:
                new_vectors.append(self.pca.project(each, whiten=True))
                self.vectors = array(new_vectors, 'd')
Ejemplo n.º 2
0
    def trainNormalization(self):
        
        assert len(self.labels) >= 2
        
        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            # Learn the classes
            n_classes = 0
            self.class_map = {}
            
            for label in self.labels:
                if not self.class_map.has_key(label):
                    self.class_map[label] = n_classes
                    n_classes+=1
            
            if self.type == TYPE_MULTICLASS:
                assert n_classes >= 2
            if self.type == TYPE_TWOCLASS:
                assert n_classes == 2
            
            self.class_inv = {}
            for key,value in self.class_map.iteritems():
                self.class_inv[value] = key

            new_labels=[]
            for each in self.labels:
                new_labels.append(self.class_map[each])
            self.labels = new_labels
                
        if self.type == TYPE_REGRESSION:
            self.reg_mean = mean(self.labels)
            self.reg_std = std(self.labels)  
            
            new_labels=[]
            for each in self.labels:
                new_labels.append((each - self.reg_mean)/self.reg_std)
            self.labels = new_labels
            
        #test length
        shape = self.vectors[0].shape
        assert len(shape) == 1

        for each in self.vectors:
            assert shape == each.shape
            
        #crate a data matrix
        data = array(self.vectors,'d')
        if self.norm == NORM_AUTO:
            self.norm = NORM_VALUE
            if data.shape[1] > 128:
                self.norm = NORM_PCA
        
        #Setup value normalization
        if self.norm == NORM_VALUE:
            self.dmean = data.mean(axis=0)
            self.dstd  = data.std(axis=0)
            self.vectors = (data-self.dmean)/self.dstd
            
        elif self.norm == NORM_PCA:
            self.pca = PCA()
            for vec in self.vectors:
                self.pca.addFeature(vec)

            if self.pca_basis > 1:
                self.pca.train(drop_front=self.pca_drop,number=self.pca_basis)
            else:
                self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis)
                
            new_vectors = []
            for each in self.vectors:
                new_vectors.append(self.pca.project(each,whiten=True))
                self.vectors=array(new_vectors,'d')
Ejemplo n.º 3
0
class VectorClassifier:

    ##
    # Configure some defaults for the classifier value normalizion.
    #
    # <p>This configures some defalts for the classifier such as the
    # type of classifier, and how values are normalized.
    def __init__(self,
                 classifer_type,
                 normalization=NORM_AUTO,
                 reg_norm=REG_NORM_VALUE,
                 pca_basis=0.95,
                 pca_drop=0):

        # Setup basic configuration
        self.type = classifer_type
        self.norm = normalization
        self.reg_norm = reg_norm
        self.pca_basis = pca_basis
        self.pca_drop = pca_drop

        self.labels = []
        self.vectors = []
        self.vector_length = None

        self.reg_mean = 0.0
        self.reg_std = 1.0

    ##
    # Learn the range of values that are expected for labels and data.
    # Then setup for normalization.
    def trainNormalization(self):

        assert len(self.labels) >= 2

        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            # Learn the classes
            n_classes = 0
            self.class_map = {}

            for label in self.labels:
                if not self.class_map.has_key(label):
                    self.class_map[label] = n_classes
                    n_classes += 1

            if self.type == TYPE_MULTICLASS:
                assert n_classes >= 2
            if self.type == TYPE_TWOCLASS:
                assert n_classes == 2

            self.class_inv = {}
            for key, value in self.class_map.iteritems():
                self.class_inv[value] = key

            new_labels = []
            for each in self.labels:
                new_labels.append(self.class_map[each])
            self.labels = new_labels

        if self.type == TYPE_REGRESSION:
            self.reg_mean = mean(self.labels)
            self.reg_std = std(self.labels)

            new_labels = []
            for each in self.labels:
                new_labels.append((each - self.reg_mean) / self.reg_std)
            self.labels = new_labels

        #test length
        shape = self.vectors[0].shape
        assert len(shape) == 1

        for each in self.vectors:
            assert shape == each.shape

        #crate a data matrix
        data = array(self.vectors, 'd')
        if self.norm == NORM_AUTO:
            self.norm = NORM_VALUE
            if data.shape[1] > 128:
                self.norm = NORM_PCA

        #Setup value normalization
        if self.norm == NORM_VALUE:
            self.dmean = data.mean(axis=0)
            self.dstd = data.std(axis=0)
            self.vectors = (data - self.dmean) / self.dstd

        elif self.norm == NORM_PCA:
            self.pca = PCA()
            for vec in self.vectors:
                self.pca.addFeature(vec)

            if self.pca_basis > 1:
                self.pca.train(drop_front=self.pca_drop, number=self.pca_basis)
            else:
                self.pca.train(drop_front=self.pca_drop, energy=self.pca_basis)

            new_vectors = []
            for each in self.vectors:
                new_vectors.append(self.pca.project(each, whiten=True))
                self.vectors = array(new_vectors, 'd')

    ##
    # Normalize the values in a data vector to be mean zero.
    def normalizeVector(self, data):
        if self.norm == NORM_NONE:
            return data
        elif self.norm == NORM_VALUE:
            return (data - self.dmean) / self.dstd
        elif self.norm == NORM_PCA:
            return self.pca.project(data, whiten=True)
        else:
            raise NotImplementedError(
                "Could not determine nomalization type: " + self.norm)

    ##
    # Add a training sample.  Data must be a vector of numbers.
    def addTraining(self, label, data, ilog=None):
        if self.type == TYPE_REGRESSION:
            self.labels.append(float(label))
        else:
            self.labels.append(label)

        if isinstance(data, pv.Image):
            data = data.asMatrix2D().flatten()
        data = array(data, 'd').flatten()

        self.vectors.append(data)

    ##
    # Predict the class or the value for the input data.
    #
    # <p>This function will perform value normalization and then
    # delegate to the subclass to perform classifiaction or
    # regression.
    def predict(self, data, ilog=None):
        if isinstance(data, pv.Image):
            data = data.asMatrix2D().flatten()
        data = array(data, 'd').flatten()

        data = self.normalizeVector(data)

        value = self.predictValue(data, ilog=ilog)

        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            return self.invertClass(value)
        if self.type == TYPE_REGRESSION:
            return self.invertReg(value)

    ##
    # Override this method in subclasses.
    # Input should be a numpy array of doubles
    #
    # If classifer output is int
    # If regression output is float
    def predictValue(self, data):
        raise NotImplementedError("This is an abstract method")

    ##
    # Train the classifer on the training data.
    #
    # This normalizes the data and the labels, and then passes the
    # results to the subclass for training.
    def train(self, ilog=None, **kwargs):
        self.trainNormalization()

        self.trainClassifer(self.labels, self.vectors, ilog=ilog, **kwargs)

        # remove training data
        del self.labels
        del self.vectors

    ##
    # This abstract method should be overridden by subclasses.
    #
    # <p> This method is called from {@link train}.  The vectors and values
    # passed to this method will have been normalized.  This method is should
    # train a classifier or regression algorithm for that normalized data.
    #
    # <p> Any keyword arguments passed to train will also be passed on to train
    # classifier.  This could allow variations in training or for verbose
    # output.
    def trainClassifer(self, labels, vectors, ilog=None, **kwargs):
        raise NotImplementedError("This is an abstract method")

    ##
    # Convert a normalized regression value back to the original scale
    def invertReg(self, value):
        return value * self.reg_std + self.reg_mean

    ##
    # Convert an integer class value back to the original label values.
    def invertClass(self, value):
        '''Map an integer back into a class label'''
        return self.class_inv[value]
Ejemplo n.º 4
0
class VectorClassifier:

    
    ##
    # Configure some defaults for the classifier value normalizion.
    #
    # <p>This configures some defalts for the classifier such as the
    # type of classifier, and how values are normalized.
    def __init__(self, classifer_type, normalization=NORM_AUTO, reg_norm=REG_NORM_VALUE, pca_basis=0.95, pca_drop=0):
        
        # Setup basic configuration
        self.type = classifer_type
        self.norm = normalization
        self.reg_norm = reg_norm
        self.pca_basis = pca_basis
        self.pca_drop = pca_drop
        
        self.labels = []
        self.vectors = []
        self.vector_length = None
        
        self.reg_mean = 0.0
        self.reg_std  = 1.0
    
    
    ##
    # Learn the range of values that are expected for labels and data.
    # Then setup for normalization.
    def trainNormalization(self):
        
        assert len(self.labels) >= 2
        
        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            # Learn the classes
            n_classes = 0
            self.class_map = {}
            
            for label in self.labels:
                if not self.class_map.has_key(label):
                    self.class_map[label] = n_classes
                    n_classes+=1
            
            if self.type == TYPE_MULTICLASS:
                assert n_classes >= 2
            if self.type == TYPE_TWOCLASS:
                assert n_classes == 2
            
            self.class_inv = {}
            for key,value in self.class_map.iteritems():
                self.class_inv[value] = key

            new_labels=[]
            for each in self.labels:
                new_labels.append(self.class_map[each])
            self.labels = new_labels
                
        if self.type == TYPE_REGRESSION:
            self.reg_mean = mean(self.labels)
            self.reg_std = std(self.labels)  
            
            new_labels=[]
            for each in self.labels:
                new_labels.append((each - self.reg_mean)/self.reg_std)
            self.labels = new_labels
            
        #test length
        shape = self.vectors[0].shape
        assert len(shape) == 1

        for each in self.vectors:
            assert shape == each.shape
            
        #crate a data matrix
        data = array(self.vectors,'d')
        if self.norm == NORM_AUTO:
            self.norm = NORM_VALUE
            if data.shape[1] > 128:
                self.norm = NORM_PCA
        
        #Setup value normalization
        if self.norm == NORM_VALUE:
            self.dmean = data.mean(axis=0)
            self.dstd  = data.std(axis=0)
            self.vectors = (data-self.dmean)/self.dstd
            
        elif self.norm == NORM_PCA:
            self.pca = PCA()
            for vec in self.vectors:
                self.pca.addFeature(vec)

            if self.pca_basis > 1:
                self.pca.train(drop_front=self.pca_drop,number=self.pca_basis)
            else:
                self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis)
                
            new_vectors = []
            for each in self.vectors:
                new_vectors.append(self.pca.project(each,whiten=True))
                self.vectors=array(new_vectors,'d')
                
        
    
    ##
    # Normalize the values in a data vector to be mean zero.
    def normalizeVector(self,data):
        if self.norm == NORM_NONE:
            return data
        elif self.norm == NORM_VALUE:
            return (data-self.dmean)/self.dstd
        elif self.norm == NORM_PCA:
            return self.pca.project(data,whiten=True)
        else:
            raise NotImplementedError("Could not determine nomalization type: "+ self.norm)
        
    
    ##
    # Add a training sample.  Data must be a vector of numbers.
    def addTraining(self,label,data,ilog=None):
        if self.type == TYPE_REGRESSION:
            self.labels.append(float(label))
        else:
            self.labels.append(label)
            
        if isinstance(data,pv.Image):
            data = data.asMatrix2D().flatten()   
        data = array(data,'d').flatten()
        
        self.vectors.append(data)
        
    
    ##
    # Predict the class or the value for the input data.
    #    
    # <p>This function will perform value normalization and then 
    # delegate to the subclass to perform classifiaction or 
    # regression.
    def predict(self,data,ilog=None):
        if isinstance(data,pv.Image):
            data = data.asMatrix2D().flatten()   
        data = array(data,'d').flatten()
        
        data = self.normalizeVector(data)
        
        value = self.predictValue(data,ilog=ilog)
        
        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            return self.invertClass(value)
        if self.type == TYPE_REGRESSION:
            return self.invertReg(value)
        

    ##
    # Override this method in subclasses.
    # Input should be a numpy array of doubles
    #
    # If classifer output is int
    # If regression output is float
    def predictValue(self,data):
        raise NotImplementedError("This is an abstract method")
        

    ##
    # Train the classifer on the training data.
    #
    # This normalizes the data and the labels, and then passes the 
    # results to the subclass for training.
    def train(self,ilog=None,**kwargs):
        self.trainNormalization()
        
        self.trainClassifer(self.labels,self.vectors,ilog=ilog,**kwargs)
        
        # remove training data
        del self.labels
        del self.vectors
    

    ##
    # This abstract method should be overridden by subclasses.
    #
    # <p> This method is called from {@link train}.  The vectors and values 
    # passed to this method will have been normalized.  This method is should
    # train a classifier or regression algorithm for that normalized data.
    #
    # <p> Any keyword arguments passed to train will also be passed on to train
    # classifier.  This could allow variations in training or for verbose
    # output.
    def trainClassifer(self,labels,vectors,ilog=None, **kwargs):
        raise NotImplementedError("This is an abstract method")
    
    ##
    # Convert a normalized regression value back to the original scale
    def invertReg(self,value):
        return value*self.reg_std + self.reg_mean
         
    
    ##
    # Convert an integer class value back to the original label values.
    def invertClass(self,value):
        '''Map an integer back into a class label'''
        return self.class_inv[value]