def trainNormalization(self): assert len(self.labels) >= 2 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: # Learn the classes n_classes = 0 self.class_map = {} for label in self.labels: if not self.class_map.has_key(label): self.class_map[label] = n_classes n_classes += 1 if self.type == TYPE_MULTICLASS: assert n_classes >= 2 if self.type == TYPE_TWOCLASS: assert n_classes == 2 self.class_inv = {} for key, value in self.class_map.iteritems(): self.class_inv[value] = key new_labels = [] for each in self.labels: new_labels.append(self.class_map[each]) self.labels = new_labels if self.type == TYPE_REGRESSION: self.reg_mean = mean(self.labels) self.reg_std = std(self.labels) new_labels = [] for each in self.labels: new_labels.append((each - self.reg_mean) / self.reg_std) self.labels = new_labels #test length shape = self.vectors[0].shape assert len(shape) == 1 for each in self.vectors: assert shape == each.shape #crate a data matrix data = array(self.vectors, 'd') if self.norm == NORM_AUTO: self.norm = NORM_VALUE if data.shape[1] > 128: self.norm = NORM_PCA #Setup value normalization if self.norm == NORM_VALUE: self.dmean = data.mean(axis=0) self.dstd = data.std(axis=0) self.vectors = (data - self.dmean) / self.dstd elif self.norm == NORM_PCA: self.pca = PCA() for vec in self.vectors: self.pca.addFeature(vec) if self.pca_basis > 1: self.pca.train(drop_front=self.pca_drop, number=self.pca_basis) else: self.pca.train(drop_front=self.pca_drop, energy=self.pca_basis) new_vectors = [] for each in self.vectors: new_vectors.append(self.pca.project(each, whiten=True)) self.vectors = array(new_vectors, 'd')
def trainNormalization(self): assert len(self.labels) >= 2 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: # Learn the classes n_classes = 0 self.class_map = {} for label in self.labels: if not self.class_map.has_key(label): self.class_map[label] = n_classes n_classes+=1 if self.type == TYPE_MULTICLASS: assert n_classes >= 2 if self.type == TYPE_TWOCLASS: assert n_classes == 2 self.class_inv = {} for key,value in self.class_map.iteritems(): self.class_inv[value] = key new_labels=[] for each in self.labels: new_labels.append(self.class_map[each]) self.labels = new_labels if self.type == TYPE_REGRESSION: self.reg_mean = mean(self.labels) self.reg_std = std(self.labels) new_labels=[] for each in self.labels: new_labels.append((each - self.reg_mean)/self.reg_std) self.labels = new_labels #test length shape = self.vectors[0].shape assert len(shape) == 1 for each in self.vectors: assert shape == each.shape #crate a data matrix data = array(self.vectors,'d') if self.norm == NORM_AUTO: self.norm = NORM_VALUE if data.shape[1] > 128: self.norm = NORM_PCA #Setup value normalization if self.norm == NORM_VALUE: self.dmean = data.mean(axis=0) self.dstd = data.std(axis=0) self.vectors = (data-self.dmean)/self.dstd elif self.norm == NORM_PCA: self.pca = PCA() for vec in self.vectors: self.pca.addFeature(vec) if self.pca_basis > 1: self.pca.train(drop_front=self.pca_drop,number=self.pca_basis) else: self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis) new_vectors = [] for each in self.vectors: new_vectors.append(self.pca.project(each,whiten=True)) self.vectors=array(new_vectors,'d')
class VectorClassifier: ## # Configure some defaults for the classifier value normalizion. # # <p>This configures some defalts for the classifier such as the # type of classifier, and how values are normalized. def __init__(self, classifer_type, normalization=NORM_AUTO, reg_norm=REG_NORM_VALUE, pca_basis=0.95, pca_drop=0): # Setup basic configuration self.type = classifer_type self.norm = normalization self.reg_norm = reg_norm self.pca_basis = pca_basis self.pca_drop = pca_drop self.labels = [] self.vectors = [] self.vector_length = None self.reg_mean = 0.0 self.reg_std = 1.0 ## # Learn the range of values that are expected for labels and data. # Then setup for normalization. def trainNormalization(self): assert len(self.labels) >= 2 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: # Learn the classes n_classes = 0 self.class_map = {} for label in self.labels: if not self.class_map.has_key(label): self.class_map[label] = n_classes n_classes += 1 if self.type == TYPE_MULTICLASS: assert n_classes >= 2 if self.type == TYPE_TWOCLASS: assert n_classes == 2 self.class_inv = {} for key, value in self.class_map.iteritems(): self.class_inv[value] = key new_labels = [] for each in self.labels: new_labels.append(self.class_map[each]) self.labels = new_labels if self.type == TYPE_REGRESSION: self.reg_mean = mean(self.labels) self.reg_std = std(self.labels) new_labels = [] for each in self.labels: new_labels.append((each - self.reg_mean) / self.reg_std) self.labels = new_labels #test length shape = self.vectors[0].shape assert len(shape) == 1 for each in self.vectors: assert shape == each.shape #crate a data matrix data = array(self.vectors, 'd') if self.norm == NORM_AUTO: self.norm = NORM_VALUE if data.shape[1] > 128: self.norm = NORM_PCA #Setup value normalization if self.norm == NORM_VALUE: self.dmean = data.mean(axis=0) self.dstd = data.std(axis=0) self.vectors = (data - self.dmean) / self.dstd elif self.norm == NORM_PCA: self.pca = PCA() for vec in self.vectors: self.pca.addFeature(vec) if self.pca_basis > 1: self.pca.train(drop_front=self.pca_drop, number=self.pca_basis) else: self.pca.train(drop_front=self.pca_drop, energy=self.pca_basis) new_vectors = [] for each in self.vectors: new_vectors.append(self.pca.project(each, whiten=True)) self.vectors = array(new_vectors, 'd') ## # Normalize the values in a data vector to be mean zero. def normalizeVector(self, data): if self.norm == NORM_NONE: return data elif self.norm == NORM_VALUE: return (data - self.dmean) / self.dstd elif self.norm == NORM_PCA: return self.pca.project(data, whiten=True) else: raise NotImplementedError( "Could not determine nomalization type: " + self.norm) ## # Add a training sample. Data must be a vector of numbers. def addTraining(self, label, data, ilog=None): if self.type == TYPE_REGRESSION: self.labels.append(float(label)) else: self.labels.append(label) if isinstance(data, pv.Image): data = data.asMatrix2D().flatten() data = array(data, 'd').flatten() self.vectors.append(data) ## # Predict the class or the value for the input data. # # <p>This function will perform value normalization and then # delegate to the subclass to perform classifiaction or # regression. def predict(self, data, ilog=None): if isinstance(data, pv.Image): data = data.asMatrix2D().flatten() data = array(data, 'd').flatten() data = self.normalizeVector(data) value = self.predictValue(data, ilog=ilog) if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: return self.invertClass(value) if self.type == TYPE_REGRESSION: return self.invertReg(value) ## # Override this method in subclasses. # Input should be a numpy array of doubles # # If classifer output is int # If regression output is float def predictValue(self, data): raise NotImplementedError("This is an abstract method") ## # Train the classifer on the training data. # # This normalizes the data and the labels, and then passes the # results to the subclass for training. def train(self, ilog=None, **kwargs): self.trainNormalization() self.trainClassifer(self.labels, self.vectors, ilog=ilog, **kwargs) # remove training data del self.labels del self.vectors ## # This abstract method should be overridden by subclasses. # # <p> This method is called from {@link train}. The vectors and values # passed to this method will have been normalized. This method is should # train a classifier or regression algorithm for that normalized data. # # <p> Any keyword arguments passed to train will also be passed on to train # classifier. This could allow variations in training or for verbose # output. def trainClassifer(self, labels, vectors, ilog=None, **kwargs): raise NotImplementedError("This is an abstract method") ## # Convert a normalized regression value back to the original scale def invertReg(self, value): return value * self.reg_std + self.reg_mean ## # Convert an integer class value back to the original label values. def invertClass(self, value): '''Map an integer back into a class label''' return self.class_inv[value]
class VectorClassifier: ## # Configure some defaults for the classifier value normalizion. # # <p>This configures some defalts for the classifier such as the # type of classifier, and how values are normalized. def __init__(self, classifer_type, normalization=NORM_AUTO, reg_norm=REG_NORM_VALUE, pca_basis=0.95, pca_drop=0): # Setup basic configuration self.type = classifer_type self.norm = normalization self.reg_norm = reg_norm self.pca_basis = pca_basis self.pca_drop = pca_drop self.labels = [] self.vectors = [] self.vector_length = None self.reg_mean = 0.0 self.reg_std = 1.0 ## # Learn the range of values that are expected for labels and data. # Then setup for normalization. def trainNormalization(self): assert len(self.labels) >= 2 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: # Learn the classes n_classes = 0 self.class_map = {} for label in self.labels: if not self.class_map.has_key(label): self.class_map[label] = n_classes n_classes+=1 if self.type == TYPE_MULTICLASS: assert n_classes >= 2 if self.type == TYPE_TWOCLASS: assert n_classes == 2 self.class_inv = {} for key,value in self.class_map.iteritems(): self.class_inv[value] = key new_labels=[] for each in self.labels: new_labels.append(self.class_map[each]) self.labels = new_labels if self.type == TYPE_REGRESSION: self.reg_mean = mean(self.labels) self.reg_std = std(self.labels) new_labels=[] for each in self.labels: new_labels.append((each - self.reg_mean)/self.reg_std) self.labels = new_labels #test length shape = self.vectors[0].shape assert len(shape) == 1 for each in self.vectors: assert shape == each.shape #crate a data matrix data = array(self.vectors,'d') if self.norm == NORM_AUTO: self.norm = NORM_VALUE if data.shape[1] > 128: self.norm = NORM_PCA #Setup value normalization if self.norm == NORM_VALUE: self.dmean = data.mean(axis=0) self.dstd = data.std(axis=0) self.vectors = (data-self.dmean)/self.dstd elif self.norm == NORM_PCA: self.pca = PCA() for vec in self.vectors: self.pca.addFeature(vec) if self.pca_basis > 1: self.pca.train(drop_front=self.pca_drop,number=self.pca_basis) else: self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis) new_vectors = [] for each in self.vectors: new_vectors.append(self.pca.project(each,whiten=True)) self.vectors=array(new_vectors,'d') ## # Normalize the values in a data vector to be mean zero. def normalizeVector(self,data): if self.norm == NORM_NONE: return data elif self.norm == NORM_VALUE: return (data-self.dmean)/self.dstd elif self.norm == NORM_PCA: return self.pca.project(data,whiten=True) else: raise NotImplementedError("Could not determine nomalization type: "+ self.norm) ## # Add a training sample. Data must be a vector of numbers. def addTraining(self,label,data,ilog=None): if self.type == TYPE_REGRESSION: self.labels.append(float(label)) else: self.labels.append(label) if isinstance(data,pv.Image): data = data.asMatrix2D().flatten() data = array(data,'d').flatten() self.vectors.append(data) ## # Predict the class or the value for the input data. # # <p>This function will perform value normalization and then # delegate to the subclass to perform classifiaction or # regression. def predict(self,data,ilog=None): if isinstance(data,pv.Image): data = data.asMatrix2D().flatten() data = array(data,'d').flatten() data = self.normalizeVector(data) value = self.predictValue(data,ilog=ilog) if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: return self.invertClass(value) if self.type == TYPE_REGRESSION: return self.invertReg(value) ## # Override this method in subclasses. # Input should be a numpy array of doubles # # If classifer output is int # If regression output is float def predictValue(self,data): raise NotImplementedError("This is an abstract method") ## # Train the classifer on the training data. # # This normalizes the data and the labels, and then passes the # results to the subclass for training. def train(self,ilog=None,**kwargs): self.trainNormalization() self.trainClassifer(self.labels,self.vectors,ilog=ilog,**kwargs) # remove training data del self.labels del self.vectors ## # This abstract method should be overridden by subclasses. # # <p> This method is called from {@link train}. The vectors and values # passed to this method will have been normalized. This method is should # train a classifier or regression algorithm for that normalized data. # # <p> Any keyword arguments passed to train will also be passed on to train # classifier. This could allow variations in training or for verbose # output. def trainClassifer(self,labels,vectors,ilog=None, **kwargs): raise NotImplementedError("This is an abstract method") ## # Convert a normalized regression value back to the original scale def invertReg(self,value): return value*self.reg_std + self.reg_mean ## # Convert an integer class value back to the original label values. def invertClass(self,value): '''Map an integer back into a class label''' return self.class_inv[value]