Example #1
0
class ClassifyKDE(ProbCat):
    """A classifier that uses the incrimental kernel density estimate model for each category. It keeps a 'psuedo-prior', a KDE_INC with an (optionally) larger variance that contains all the samples. Uses entities that can index a dictionary for categories."""

    def __init__(self, prec, cap=32, mult=1.0):
        """You provide the precision that is to be used (As a 2D numpy array, so it implicitly provides the number of dimensions.), the cap on the number of components in the KDE_INC objects and the multiplier for the standard deviation of the components in the 'psuedo-prior'."""
        self.prec = numpy.array(prec, dtype=numpy.float32)
        self.cap = cap
        self.mult = mult

        self.prior = KDE_INC(self.prec / (self.mult * self.mult), self.cap)
        self.cats = dict()  # Dictionary indexed by category going to the associated KDE_INC object.
        self.counts = None

    def setPrec(self, prec):
        """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one."""
        self.prec = numpy.array(prec, dtype=numpy.float32)
        self.prior.setPrec(self.prec / (self.mult * self.mult))

    def priorAdd(self, sample):
        self.prior.add(sample)

    def add(self, sample, cat):
        if cat not in self.cats:
            self.cats[cat] = KDE_INC(self.prec, self.cap)

        self.cats[cat].add(sample)
        self.counts = None

    def getSampleTotal(self):
        return sum(map(lambda mm: mm.samples(), self.cats.itervalues()))

    def getCatTotal(self):
        return len(self.cats)

    def getCatList(self):
        return self.cats.keys()

    def getCatCounts(self):
        if self.counts == None:
            self.counts = dict()
            for cat, mm in self.cats.iteritems():
                self.counts[cat] = mm.samples()

        return self.counts

    def getDataProb(self, sample, state=None):
        ret = dict()
        ret[None] = self.prior.prob(sample)
        for cat, mm in self.cats.iteritems():
            ret[cat] = mm.prob(sample)
        return ret

    def getDataNLL(self, sample, state=None):
        ret = dict()
        ret[None] = self.prior.nll(sample)
        for cat, mm in self.cats.iteritems():
            ret[cat] = mm.nll(sample)
        return ret
Example #2
0
class ClassifyKDE(ProbCat):
  """A classifier that uses the incrimental kernel density estimate model for each category. It keeps a 'psuedo-prior', a KDE_INC with an (optionally) larger variance that contains all the samples. Uses entities that can index a dictionary for categories."""
  def __init__(self, prec, cap = 32, mult = 1.0):
    """You provide the precision that is to be used (As a 2D numpy array, so it implicitly provides the number of dimensions.), the cap on the number of components in the KDE_INC objects and the multiplier for the standard deviation of the components in the 'psuedo-prior'."""
    self.prec = numpy.array(prec, dtype=numpy.float32)
    self.cap = cap
    self.mult = mult

    self.prior = KDE_INC(self.prec / (self.mult*self.mult), self.cap)
    self.cats = dict() # Dictionary indexed by category going to the associated KDE_INC object.
    self.counts = None

  def setPrec(self, prec):
    """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one."""
    self.prec = numpy.array(prec, dtype=numpy.float32)
    self.prior.setPrec(self.prec / (self.mult*self.mult))


  def priorAdd(self, sample):
    self.prior.add(sample)

  def add(self, sample, cat):
    if cat not in self.cats: self.cats[cat] = KDE_INC(self.prec, self.cap)
    
    self.cats[cat].add(sample)
    self.counts = None


  def getSampleTotal(self):
    return sum(map(lambda mm: mm.samples(), self.cats.itervalues()))


  def getCatTotal(self):
    return len(self.cats)

  def getCatList(self):
    return self.cats.keys()

  def getCatCounts(self):
    if self.counts==None:
      self.counts = dict()
      for cat, mm in self.cats.iteritems():
        self.counts[cat] = mm.samples()

    return self.counts
    

  def getDataProb(self, sample, state = None):
    ret = dict()
    ret[None] = self.prior.prob(sample)
    for cat, mm in self.cats.iteritems(): ret[cat] = mm.prob(sample)
    return ret

  def getDataNLL(self, sample, state = None):
    ret = dict()
    ret[None] = self.prior.nll(sample)
    for cat, mm in self.cats.iteritems(): ret[cat] = mm.nll(sample)
    return ret
Example #3
0
class ClassifyDF_KDE(ProbCat):
    """A classifier that uses decision forests. Includes the use of a density estimate decision forest as a psuedo-prior. The incrimental method used is rather simple, but still works reasonably well. Provides default parameters for the decision forests, but allows access to them for if you want to mess around. Internally the decision forests have two channels - the first is the data, the second the class."""
    def __init__(self,
                 prec,
                 cap,
                 treeCount,
                 incAdd=1,
                 testDims=3,
                 dimCount=4,
                 rotCount=32):
        """prec is the precision matrix for the density estimate done with kernel density estimation; cap is the component cap for said kernel density estimate. treeCount is how many trees to use for the classifying decision forest whilst incAdd is how many to train for each new sample. testDims is the number of dimensions to use for each test, dimCount the number of combinations of dimensions to try for generating each nodes decision and rotCount the number of orientations to try for each nodes test generation."""
        # Support structures...
        self.cats = dict()  # Dictionary from cat to internal indexing number.
        self.treeCount = treeCount
        self.incAdd = incAdd

        # Setup the classification forest...
        self.classify = DF()
        self.classify.setInc(True)
        self.classify.setGoal(Classification(None, 1))
        self.classify.setGen(
            LinearClassifyGen(0, 1, testDims, dimCount, rotCount))

        self.classifyData = MatrixGrow()
        self.classifyTrain = self.treeCount

        # Setup the density estimation kde...
        self.density = KDE_INC(prec, cap)

    def getClassifier(self):
        """Returns the decision forest used for classification."""
        return self.classify

    def getDensityEstimate(self):
        """Returns the KDE_INC used for density estimation, as a psuedo-prior."""
        return self.density

    def priorAdd(self, sample):
        self.density.add(sample)

    def add(self, sample, cat):
        if cat in self.cats:
            c = self.cats[cat]
        else:
            c = len(self.cats)
            self.cats[cat] = c

        self.classifyData.append(
            numpy.asarray(sample, dtype=numpy.float32),
            numpy.asarray(c, dtype=numpy.int32).reshape((1, )))
        self.classifyTrain += self.incAdd

    def getSampleTotal(self):
        return self.classifyData.exemplars()

    def getCatTotal(self):
        return len(self.cats)

    def getCatList(self):
        return self.cats.keys()

    def getCatCounts(self):
        if len(self.cats) == 0: return dict()

        counts = numpy.bincount(self.classifyData[1, :, 0])

        ret = dict()
        for cat, c in self.cats.iteritems():
            ret[cat] = counts[c] if c < counts.shape[0] else 0
        return ret

    def listMode(self):
        return True

    def getDataProb(self, sample, state=None):
        # Update the model as needed - this will potentially take some time...
        if self.classifyTrain != 0 and self.classifyData.exemplars() != 0:
            self.classify.learn(min(self.classifyTrain, self.treeCount),
                                self.classifyData,
                                clamp=self.treeCount,
                                mp=False)
            self.classifyTrain = 0

        # Generate the result and create and return the right output structure...
        ret = dict()

        if self.classify.size() != 0:
            eval_c = self.classify.evaluate(MatrixES(sample), which='gen')[0]
            for cat, c in self.cats.iteritems():
                ret[cat] = eval_c[c] if c < eval_c.shape[0] else 0.0

        ret[None] = self.density.prob(sample)

        return ret

    def getDataProbList(self, sample, state=None):
        # Update the models as needed - this will potentially take some time...
        if self.classifyTrain != 0 and self.classifyData.exemplars() != 0:
            self.classify.learn(min(self.classifyTrain, self.treeCount),
                                self.classifyData,
                                clamp=self.treeCount,
                                mp=False)
            self.classifyTrain = 0

        # Fetch the required information...
        if self.classify.size() != 0:
            eval_c = self.classify.evaluate(MatrixES(sample),
                                            which='gen_list')[0]
        else:
            return [{None: 1.0}]

        eval_d = self.density.prob(sample)

        # Construct and return the output...
        ret = []

        for ec in eval_c:
            r = {None: eval_d}

            for cat, c in self.cats.iteritems():
                r[cat] = ec[c] if c < ec.shape[0] else 0.0

            ret.append(r)

        return ret
class ClassifyDF_KDE(ProbCat):
  """A classifier that uses decision forests. Includes the use of a density estimate decision forest as a psuedo-prior. The incrimental method used is rather simple, but still works reasonably well. Provides default parameters for the decision forests, but allows access to them for if you want to mess around. Internally the decision forests have two channels - the first is the data, the second the class."""
  def __init__(self, prec, cap, treeCount, incAdd = 1, testDims = 3, dimCount = 4, rotCount = 32):
    """prec is the precision matrix for the density estimate done with kernel density estimation; cap is the component cap for said kernel density estimate. treeCount is how many trees to use for the classifying decision forest whilst incAdd is how many to train for each new sample. testDims is the number of dimensions to use for each test, dimCount the number of combinations of dimensions to try for generating each nodes decision and rotCount the number of orientations to try for each nodes test generation."""
    # Support structures...
    self.cats = dict() # Dictionary from cat to internal indexing number.
    self.treeCount = treeCount
    self.incAdd = incAdd
    
    # Setup the classification forest...
    self.classify = DF()
    self.classify.setInc(True)
    self.classify.setGoal(Classification(None, 1))
    self.classify.setGen(LinearClassifyGen(0, 1, testDims, dimCount, rotCount))
    
    self.classifyData = MatrixGrow()
    self.classifyTrain = self.treeCount
    
    # Setup the density estimation kde...
    self.density = KDE_INC(prec, cap)
  
  def getClassifier(self):
    """Returns the decision forest used for classification."""
    return self.classify
  
  def getDensityEstimate(self):
    """Returns the KDE_INC used for density estimation, as a psuedo-prior."""
    return self.density
  
  def priorAdd(self, sample):
    self.density.add(sample)

  def add(self, sample, cat):
    if cat in self.cats:
      c = self.cats[cat]
    else:
      c = len(self.cats)
      self.cats[cat] = c
    
    self.classifyData.append(numpy.asarray(sample, dtype=numpy.float32), numpy.asarray(c, dtype=numpy.int32).reshape((1,)))
    self.classifyTrain += self.incAdd


  def getSampleTotal(self):
    return self.classifyData.exemplars()


  def getCatTotal(self):
    return len(self.cats)

  def getCatList(self):
    return self.cats.keys()

  def getCatCounts(self):
    if len(self.cats)==0: return dict()
    
    counts = numpy.bincount(self.classifyData[1,:,0])
    
    ret = dict()
    for cat, c in self.cats.iteritems():
      ret[cat] = counts[c] if c<counts.shape[0] else 0
    return ret
  
  
  def listMode(self):
    return True


  def getDataProb(self, sample, state = None):
    # Update the model as needed - this will potentially take some time...
    if self.classifyTrain!=0 and self.classifyData.exemplars()!=0:
      self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp = self.treeCount, mp=False)
      self.classifyTrain = 0
    
    # Generate the result and create and return the right output structure...
    ret = dict()
    
    if self.classify.size()!=0:
      eval_c = self.classify.evaluate(MatrixES(sample), which = 'gen')[0]
      for cat, c in self.cats.iteritems():
        ret[cat] = eval_c[c] if c<eval_c.shape[0] else 0.0

    ret[None] = self.density.prob(sample)

    return ret
  
  
  def getDataProbList(self, sample, state = None):
    # Update the models as needed - this will potentially take some time...
    if self.classifyTrain!=0 and self.classifyData.exemplars()!=0:
      self.classify.learn(min(self.classifyTrain, self.treeCount), self.classifyData, clamp = self.treeCount, mp=False)
      self.classifyTrain = 0
    
    # Fetch the required information...
    if self.classify.size()!=0:
      eval_c = self.classify.evaluate(MatrixES(sample), which = 'gen_list')[0]
    else:
      return [{None:1.0}]
    
    eval_d = self.density.prob(sample)
    
    # Construct and return the output...
    ret = []

    for ec in eval_c:
      r = {None:eval_d}

      for cat, c in self.cats.iteritems():
        r[cat] = ec[c] if c<ec.shape[0] else 0.0

      ret.append(r)

    return ret