class ClassifyKDE(ProbCat): """A classifier that uses the incrimental kernel density estimate model for each category. It keeps a 'psuedo-prior', a KDE_INC with an (optionally) larger variance that contains all the samples. Uses entities that can index a dictionary for categories.""" def __init__(self, prec, cap=32, mult=1.0): """You provide the precision that is to be used (As a 2D numpy array, so it implicitly provides the number of dimensions.), the cap on the number of components in the KDE_INC objects and the multiplier for the standard deviation of the components in the 'psuedo-prior'.""" self.prec = numpy.array(prec, dtype=numpy.float32) self.cap = cap self.mult = mult self.prior = KDE_INC(self.prec / (self.mult * self.mult), self.cap) self.cats = dict() # Dictionary indexed by category going to the associated KDE_INC object. self.counts = None def setPrec(self, prec): """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one.""" self.prec = numpy.array(prec, dtype=numpy.float32) self.prior.setPrec(self.prec / (self.mult * self.mult)) def priorAdd(self, sample): self.prior.add(sample) def add(self, sample, cat): if cat not in self.cats: self.cats[cat] = KDE_INC(self.prec, self.cap) self.cats[cat].add(sample) self.counts = None def getSampleTotal(self): return sum(map(lambda mm: mm.samples(), self.cats.itervalues())) def getCatTotal(self): return len(self.cats) def getCatList(self): return self.cats.keys() def getCatCounts(self): if self.counts == None: self.counts = dict() for cat, mm in self.cats.iteritems(): self.counts[cat] = mm.samples() return self.counts def getDataProb(self, sample, state=None): ret = dict() ret[None] = self.prior.prob(sample) for cat, mm in self.cats.iteritems(): ret[cat] = mm.prob(sample) return ret def getDataNLL(self, sample, state=None): ret = dict() ret[None] = self.prior.nll(sample) for cat, mm in self.cats.iteritems(): ret[cat] = mm.nll(sample) return ret
class ClassifyKDE(ProbCat): """A classifier that uses the incrimental kernel density estimate model for each category. It keeps a 'psuedo-prior', a KDE_INC with an (optionally) larger variance that contains all the samples. Uses entities that can index a dictionary for categories.""" def __init__(self, prec, cap = 32, mult = 1.0): """You provide the precision that is to be used (As a 2D numpy array, so it implicitly provides the number of dimensions.), the cap on the number of components in the KDE_INC objects and the multiplier for the standard deviation of the components in the 'psuedo-prior'.""" self.prec = numpy.array(prec, dtype=numpy.float32) self.cap = cap self.mult = mult self.prior = KDE_INC(self.prec / (self.mult*self.mult), self.cap) self.cats = dict() # Dictionary indexed by category going to the associated KDE_INC object. self.counts = None def setPrec(self, prec): """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one.""" self.prec = numpy.array(prec, dtype=numpy.float32) self.prior.setPrec(self.prec / (self.mult*self.mult)) def priorAdd(self, sample): self.prior.add(sample) def add(self, sample, cat): if cat not in self.cats: self.cats[cat] = KDE_INC(self.prec, self.cap) self.cats[cat].add(sample) self.counts = None def getSampleTotal(self): return sum(map(lambda mm: mm.samples(), self.cats.itervalues())) def getCatTotal(self): return len(self.cats) def getCatList(self): return self.cats.keys() def getCatCounts(self): if self.counts==None: self.counts = dict() for cat, mm in self.cats.iteritems(): self.counts[cat] = mm.samples() return self.counts def getDataProb(self, sample, state = None): ret = dict() ret[None] = self.prior.prob(sample) for cat, mm in self.cats.iteritems(): ret[cat] = mm.prob(sample) return ret def getDataNLL(self, sample, state = None): ret = dict() ret[None] = self.prior.nll(sample) for cat, mm in self.cats.iteritems(): ret[cat] = mm.nll(sample) return ret
class ClassifyDF_KDE(ProbCat): """A classifier that uses decision forests. Includes the use of a density estimate decision forest as a psuedo-prior. The incrimental method used is rather simple, but still works reasonably well. Provides default parameters for the decision forests, but allows access to them for if you want to mess around. Internally the decision forests have two channels - the first is the data, the second the class.""" def __init__(self, prec, cap, treeCount, incAdd=1, testDims=3, dimCount=4, rotCount=32): """prec is the precision matrix for the density estimate done with kernel density estimation; cap is the component cap for said kernel density estimate. treeCount is how many trees to use for the classifying decision forest whilst incAdd is how many to train for each new sample. testDims is the number of dimensions to use for each test, dimCount the number of combinations of dimensions to try for generating each nodes decision and rotCount the number of orientations to try for each nodes test generation.""" # Support structures... self.cats = dict() # Dictionary from cat to internal indexing number. self.treeCount = treeCount self.incAdd = incAdd # Setup the classification forest... self.classify = DF() self.classify.setInc(True) self.classify.setGoal(Classification(None, 1)) self.classify.setGen( LinearClassifyGen(0, 1, testDims, dimCount, rotCount)) self.classifyData = MatrixGrow() self.classifyTrain = self.treeCount # Setup the density estimation kde... self.density = KDE_INC(prec, cap) def getClassifier(self): """Returns the decision forest used for classification.""" return self.classify def getDensityEstimate(self): """Returns the KDE_INC used for density estimation, as a psuedo-prior.""" return self.density def priorAdd(self, sample): self.density.add(sample) def add(self, sample, cat): if cat in self.cats: c = self.cats[cat] else: c = len(self.cats) self.cats[cat] = c self.classifyData.append( numpy.asarray(sample, dtype=numpy.float32), numpy.asarray(c, dtype=numpy.int32).reshape((1, ))) self.classifyTrain += self.incAdd def getSampleTotal(self): return self.classifyData.exemplars() def getCatTotal(self): return len(self.cats) def getCatList(self): return self.cats.keys() def getCatCounts(self): if len(self.cats) == 0: return dict() counts = numpy.bincount(self.classifyData[1, :, 0]) ret = dict() for cat, c in self.cats.iteritems(): ret[cat] = counts[c] if c < counts.shape[0] else 0 return ret def listMode(self): return True def getDataProb(self, sample, state=None): # Update the model as needed - this will potentially take some time... if self.classifyTrain != 0 and self.classifyData.exemplars() != 0: self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp=self.treeCount, mp=False) self.classifyTrain = 0 # Generate the result and create and return the right output structure... ret = dict() if self.classify.size() != 0: eval_c = self.classify.evaluate(MatrixES(sample), which='gen')[0] for cat, c in self.cats.iteritems(): ret[cat] = eval_c[c] if c < eval_c.shape[0] else 0.0 ret[None] = self.density.prob(sample) return ret def getDataProbList(self, sample, state=None): # Update the models as needed - this will potentially take some time... if self.classifyTrain != 0 and self.classifyData.exemplars() != 0: self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp=self.treeCount, mp=False) self.classifyTrain = 0 # Fetch the required information... if self.classify.size() != 0: eval_c = self.classify.evaluate(MatrixES(sample), which='gen_list')[0] else: return [{None: 1.0}] eval_d = self.density.prob(sample) # Construct and return the output... ret = [] for ec in eval_c: r = {None: eval_d} for cat, c in self.cats.iteritems(): r[cat] = ec[c] if c < ec.shape[0] else 0.0 ret.append(r) return ret
class ClassifyDF_KDE(ProbCat): """A classifier that uses decision forests. Includes the use of a density estimate decision forest as a psuedo-prior. The incrimental method used is rather simple, but still works reasonably well. Provides default parameters for the decision forests, but allows access to them for if you want to mess around. Internally the decision forests have two channels - the first is the data, the second the class.""" def __init__(self, prec, cap, treeCount, incAdd = 1, testDims = 3, dimCount = 4, rotCount = 32): """prec is the precision matrix for the density estimate done with kernel density estimation; cap is the component cap for said kernel density estimate. treeCount is how many trees to use for the classifying decision forest whilst incAdd is how many to train for each new sample. testDims is the number of dimensions to use for each test, dimCount the number of combinations of dimensions to try for generating each nodes decision and rotCount the number of orientations to try for each nodes test generation.""" # Support structures... self.cats = dict() # Dictionary from cat to internal indexing number. self.treeCount = treeCount self.incAdd = incAdd # Setup the classification forest... self.classify = DF() self.classify.setInc(True) self.classify.setGoal(Classification(None, 1)) self.classify.setGen(LinearClassifyGen(0, 1, testDims, dimCount, rotCount)) self.classifyData = MatrixGrow() self.classifyTrain = self.treeCount # Setup the density estimation kde... self.density = KDE_INC(prec, cap) def getClassifier(self): """Returns the decision forest used for classification.""" return self.classify def getDensityEstimate(self): """Returns the KDE_INC used for density estimation, as a psuedo-prior.""" return self.density def priorAdd(self, sample): self.density.add(sample) def add(self, sample, cat): if cat in self.cats: c = self.cats[cat] else: c = len(self.cats) self.cats[cat] = c self.classifyData.append(numpy.asarray(sample, dtype=numpy.float32), numpy.asarray(c, dtype=numpy.int32).reshape((1,))) self.classifyTrain += self.incAdd def getSampleTotal(self): return self.classifyData.exemplars() def getCatTotal(self): return len(self.cats) def getCatList(self): return self.cats.keys() def getCatCounts(self): if len(self.cats)==0: return dict() counts = numpy.bincount(self.classifyData[1,:,0]) ret = dict() for cat, c in self.cats.iteritems(): ret[cat] = counts[c] if c<counts.shape[0] else 0 return ret def listMode(self): return True def getDataProb(self, sample, state = None): # Update the model as needed - this will potentially take some time... if self.classifyTrain!=0 and self.classifyData.exemplars()!=0: self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp = self.treeCount, mp=False) self.classifyTrain = 0 # Generate the result and create and return the right output structure... ret = dict() if self.classify.size()!=0: eval_c = self.classify.evaluate(MatrixES(sample), which = 'gen')[0] for cat, c in self.cats.iteritems(): ret[cat] = eval_c[c] if c<eval_c.shape[0] else 0.0 ret[None] = self.density.prob(sample) return ret def getDataProbList(self, sample, state = None): # Update the models as needed - this will potentially take some time... if self.classifyTrain!=0 and self.classifyData.exemplars()!=0: self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp = self.treeCount, mp=False) self.classifyTrain = 0 # Fetch the required information... if self.classify.size()!=0: eval_c = self.classify.evaluate(MatrixES(sample), which = 'gen_list')[0] else: return [{None:1.0}] eval_d = self.density.prob(sample) # Construct and return the output... ret = [] for ec in eval_c: r = {None:eval_d} for cat, c in self.cats.iteritems(): r[cat] = ec[c] if c<ec.shape[0] else 0.0 ret.append(r) return ret