コード例 #1
0
ファイル: classify_dpgmm.py プロジェクト: zoginni/helit
  def __init__(self, dims, runs = 1):
    """dims is the number of dimensions the input vectors have, whilst runs is how many starting points to converge from for each variational run. Increasing runs helps to avoid local minima at the expense of computation, but as it often converges well enough with the first attempt, so this is only for the paranoid."""
    self.dims = dims
    self.runs = runs

    self.inc = True

    self.prior = DPGMM(self.dims)
    self.cats = dict() # Dictionary indexed by category going to the associated DPGMM object.
    self.counts = None
コード例 #2
0
ファイル: classify_dpgmm.py プロジェクト: zoginni/helit
  def add(self, sample, cat):
    if cat not in self.cats: self.cats[cat] = DPGMM(self.dims)

    self.cats[cat].add(sample)
    if self.inc and self.cats[cat].setPrior():
      self.cats[cat] = self.cats[cat].multiGrowSolve(self.runs)

    self.counts = None
コード例 #3
0
  def __init__(self, dims, runs = 1):
    """dims is the number of dimensions the input vectors have, whilst runs is how many starting points to converge from for each variational run. Increasing runs helps to avoid local minima at the expense of computation, but as it often converges well enough with the first attempt, so this is only for the paranoid."""
    self.dims = dims
    self.runs = runs

    self.inc = True

    self.prior = DPGMM(self.dims)
    self.cats = dict() # Dictionary indexed by category going to the associated DPGMM object.
    self.counts = None
コード例 #4
0
class ClassifyDPGMM(ProbCat):
  """A classifier that uses a Dirichlet process Gaussian mixture model (DPGMM) for each category. Also includes a psuedo-prior in the form of an extra DPGMM that you can feed. Trains them incrimentally, increasing the mixture component cap when that results in an improvement in model performance. Be aware that whilst this is awesome its memory consumption can be fierce, and its a computational hog. Includes the ability to switch off incrimental learning, which can save some time if your not using the model between trainning samples."""
  def __init__(self, dims, runs = 1):
    """dims is the number of dimensions the input vectors have, whilst runs is how many starting points to converge from for each variational run. Increasing runs helps to avoid local minima at the expense of computation, but as it often converges well enough with the first attempt, so this is only for the paranoid."""
    self.dims = dims
    self.runs = runs

    self.inc = True

    self.prior = DPGMM(self.dims)
    self.cats = dict() # Dictionary indexed by category going to the associated DPGMM object.
    self.counts = None


  def priorAdd(self, sample):
    self.prior.add(sample)
    if self.inc and self.prior.setPrior():
      self.prior = self.prior.multiGrowSolve(self.runs)

  def add(self, sample, cat):
    if cat not in self.cats: self.cats[cat] = DPGMM(self.dims)

    self.cats[cat].add(sample)
    if self.inc and self.cats[cat].setPrior():
      self.cats[cat] = self.cats[cat].multiGrowSolve(self.runs)

    self.counts = None


  def setInc(self, state):
    """With a state of False it disables incrimental learning until further notice, with a state of True it reenables it, and makes sure that it is fully up to date by updating everything. Note that when reenabled it assumes that enough data is avaliable, and will crash if not, unlike the incrimental approach that just twiddles its thumbs - in a sense this is safer if you want to avoid bad results."""
    self.inc = state

    if self.inc:
      self.prior.setPrior()
      self.prior = self.prior.multiGrowSolve(self.runs)

      for cat in self.cats.iterkeys():
        self.cats[cat].setPrior()
        self.cats[cat] = self.cats[cat].multiGrowSolve(self.runs)


  def getSampleTotal(self):
    sum(map(lambda mm: mm.size(), self.cats.itervalues()))


  def getCatTotal(self):
    return len(self.cats)

  def getCatList(self):
    return self.cats.keys()

  def getCatCounts(self):
    if self.counts==None:
      self.counts = dict()
      for cat, mm in self.cats.iteritems():
        self.counts[cat] = mm.size()

    return self.counts


  def getDataProb(self, sample, state = None):
    ret = dict()
    for cat, mm in self.cats.iteritems(): ret[cat] = mm.prob(sample)
    return ret
コード例 #5
0
ファイル: classify_dpgmm.py プロジェクト: zoginni/helit
class ClassifyDPGMM(ProbCat):
  """A classifier that uses a Dirichlet process Gaussian mixture model (DPGMM) for each category. Also includes a psuedo-prior in the form of an extra DPGMM that you can feed. Trains them incrimentally, increasing the mixture component cap when that results in an improvement in model performance. Be aware that whilst this is awesome its memory consumption can be fierce, and its a computational hog. Includes the ability to switch off incrimental learning, which can save some time if your not using the model between trainning samples."""
  def __init__(self, dims, runs = 1):
    """dims is the number of dimensions the input vectors have, whilst runs is how many starting points to converge from for each variational run. Increasing runs helps to avoid local minima at the expense of computation, but as it often converges well enough with the first attempt, so this is only for the paranoid."""
    self.dims = dims
    self.runs = runs

    self.inc = True

    self.prior = DPGMM(self.dims)
    self.cats = dict() # Dictionary indexed by category going to the associated DPGMM object.
    self.counts = None


  def priorAdd(self, sample):
    self.prior.add(sample)
    if self.inc and self.prior.setPrior():
      self.prior = self.prior.multiGrowSolve(self.runs)

  def add(self, sample, cat):
    if cat not in self.cats: self.cats[cat] = DPGMM(self.dims)

    self.cats[cat].add(sample)
    if self.inc and self.cats[cat].setPrior():
      self.cats[cat] = self.cats[cat].multiGrowSolve(self.runs)

    self.counts = None


  def setInc(self, state):
    """With a state of False it disables incrimental learning until further notice, with a state of True it reenables it, and makes sure that it is fully up to date by updating everything. Note that when reenabled it assumes that enough data is avaliable, and will crash if not, unlike the incrimental approach that just twiddles its thumbs - in a sense this is safer if you want to avoid bad results."""
    self.inc = state

    if self.inc:
      self.prior.setPrior()
      self.prior = self.prior.multiGrowSolve(self.runs)

      for cat in self.cats.iterkeys():
        self.cats[cat].setPrior()
        self.cats[cat] = self.cats[cat].multiGrowSolve(self.runs)


  def getSampleTotal(self):
    sum(map(lambda mm: mm.size(), self.cats.itervalues()))


  def getCatTotal(self):
    return len(self.cats)

  def getCatList(self):
    return self.cats.keys()

  def getCatCounts(self):
    if self.counts==None:
      self.counts = dict()
      for cat, mm in self.cats.iteritems():
        self.counts[cat] = mm.size()

    return self.counts


  def getDataProb(self, sample, state = None):
    ret = dict()
    for cat, mm in self.cats.iteritems(): ret[cat] = mm.prob(sample)
    return ret
コード例 #6
0
ファイル: testDPGMM.py プロジェクト: hjanime/CSI
if __name__ == '__main__':
    color_iter = itertools.cycle(['k','r','g','b','c','m','y'])
    FILTER_VAL = [99999,100000,1000000]
    fwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Forward.wig', smooth=False)
    rwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Reverse.wig', smooth=False)
    peaks = Peak.objects.filter(run=9).order_by('-size')
    
    for i in range(10):
        print peaks[i]
        for filter_val in FILTER_VAL:
            
            if peaks[i].strand == '+':
                axis_x, x, orig_y = getPeakWig(fwig, peaks[i])
            else:
                axis_x, x, orig_y = getPeakWig(rwig, peaks[i])
            model = DPGMM(1)
            skmodel = mixture.DPGMM(n_components=8,alpha=32,n_iter=10000)
            min_x = axis_x[0]
            axis_x = axis_x - min_x
            x = x-min_x
            data = []
            print min_x
            for v in x:
                model.add([v])
                data.append(v)
            #print model.data
            skmodel.fit(data)
            print "data mean: ", np.average(x)
            print "SK means: ",skmodel.means_, ' ', skmodel._get_covars(), ' ', skmodel.bic(x),' ', skmodel.converged_
            model.setPrior()
            print 'start'