def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True): """ M total number of clusters MS number of clusters assigned to signal MF number of clusters used for initial fit M - MF If doing touchup, number of clusters to assign to this """ print "init0" oo = self k = oo.k MF = oo.M if MF is None else MF print "MF %d" % MF _x = _N.empty((n2-n1, k)) _x[:, 0] = pos _x[:, 1:] = mk N = n2-n1 # Gibbs sampling ################ init cluster centers if sepHash: # treat hash spikes seperately print "sepHashMthd %d" % sepHashMthd if sepHashMthd == 0: ########################## bgCh = _N.max(_x[:, 1:], axis=1) inds = _N.array([i[0] for i in sorted(enumerate(bgCh), key=lambda x:x[1])]) # hash are lowest 70% # signal are highest 30% pH = int(bgCh.shape[0]*pctH) if MS is None: MH = int(M*pctH) MS = MF - MH else: MH = MF - MS sigInds = inds[pH:] smkpos = _x[sigInds] labH = _fu.bestcluster(50, _x[inds[0:pH]], MH) labS = _fu.bestcluster(50, smkpos, MS) else: # sepHashMthd == 1 ########################## BINS = 20 bins = _N.linspace(-6, 6, BINS+1) cumcnts = _N.zeros(BINS) blksz = 20 ##################### separate hash / nonhash indices nonhash = [] for ch in xrange(1, 5): done = False inds = _N.array([i[0] for i in sorted(enumerate(_x[:, ch]), key=lambda x:x[1], reverse=True)]) blk = -1 cumcnts[:] = 0 while not done: blk += 1 cnts, bns = _N.histogram(_x[inds[blk*blksz:(blk+1)*blksz], 0], bins=bins) cumcnts += cnts if len(_N.where(cumcnts < 2)[0]) <= 3: done = True nonhash.extend(inds[0:(blk+1)*blksz]) unonhash = _N.unique(nonhash) # not hash spikes hashsp = _N.setdiff1d(inds, unonhash) # inds is contiguous but reordered all ## place-specific firing of #_x[:, 0] *= 5 MH = MF - MS sigInds = unonhash smkpos = _x[sigInds] #labS = _fu.bestcluster(50, smkpos, MS) #labS, MSA = _fu.findBestClusterBySplit(smkpos, MS, oo.k, 15) labS = _fu.spClstrs3MkCl(smkpos) MSA = len(labS) if MSA > MS: MH = MF - MS MS = MSA #_fu.colorclusters(smkpos, labS, MSA) labH = _fu.bestcluster(50, _x[hashsp], MH) #bins = _N.linspace(-30, 30, 101) #labH = _fu.positionalClusters(_x[hashsp, 0], bins, MH) #histdat = _plt.hist(_x[hashsp, 0], bins=bins) #_N.savetxt("hist", histdat[0], fmt="%.4f") #_N.savetxt("hash", _x[hashsp], fmt="%.4f %.4f %.4f %.4f %.4f") #_x[:, 0] /= 5 #scrH, labH = scv.kmeans2(_x[hashsp], MH) ################## lab = _N.array(labH.tolist() + (labS + MH).tolist()) x = _N.empty((n2-n1, k)) if sepHashMthd == 0: x[:, 0] = _x[inds, 0] x[:, 1:] = _x[inds, 1:] else: x[0:len(hashsp)] = _x[hashsp] x[len(hashsp):] = _x[sigInds] else: # don't separate hash from signal marks. simple kmeans2 x = _x if not kmeansinit: # just random initial conditions print "random initial conditions" lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int) else: scr, lab = scv.kmeans2(x, MF) # now assign the cluster we've found to Gaussian mixtures SI = N / MF covAll = _N.cov(x.T) dcovMag= _N.diagonal(covAll)*0.005 for im in xrange(MF): kinds = _N.where(lab == im)[0] # inds if len(kinds) > 6: # problem when cov is not positive def. oo.smu[0, im] = _N.mean(x[kinds], axis=0) oo.scov[0, im] = _N.cov(x[kinds], rowvar=0) oo.sm[0, im] = float(len(kinds)+1) / (N+MF) else: #oo.smu[0, im] = _N.mean(x[sigInds], axis=0) oo.smu[0, im] = _N.mean(x, axis=0) oo.scov[0, im] = covAll*0.125 oo.sm[0, im] = float(len(kinds)+1) / (N+MF)
def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True): """ M total number of clusters MS number of clusters assigned to signal MF number of clusters used for initial fit M - MF If doing touchup, number of clusters to assign to this """ print "init0" oo = self k = oo.k MF = oo.M if MF is None else MF print "MF %d" % MF _x = _N.empty((n2-n1, k)) _x[:, 0] = pos _x[:, 1:] = mk N = n2-n1 # Gibbs sampling ################ init cluster centers if sepHash: # treat hash spikes seperately ########################## BINS = 20 bins = _N.linspace(-6, 6, BINS+1) blksz = 20 unonhash, hashsp = _fu.sepHash(_x,BINS=BINS,blksz=20,xlo=-6,xhi=6) MH = MF - MS sigInds = unonhash smkpos = _x[sigInds] print smkpos labS = _fu.spClstrs3MkCl(smkpos) MSA = len(labS) if MSA > MS: MH = MF - MS MS = MSA labH = _fu.bestcluster(50, _x[hashsp], MH) ################## lab = _N.array(labH.tolist() + (labS + MH).tolist()) x = _N.empty((n2-n1, k)) if sepHashMthd == 0: x[:, 0] = _x[inds, 0] x[:, 1:] = _x[inds, 1:] else: x[0:len(hashsp)] = _x[hashsp] x[len(hashsp):] = _x[sigInds] else: # don't separate hash from signal marks. simple kmeans2 x = _x if not kmeansinit: # just random initial conditions print "random initial conditions" lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int) else: ITERS = 20 labsAll = [] mAll = [] bics = _N.empty(ITERS) for it in xrange(ITERS): scr, lab = scv.kmeans2(x, MF) _fu.contiguous_pack(lab) bic, K = _fu.kmBIC(scr, lab, x) bics[it] = bic mAll.append(K) labsAll.append(lab) bestI = _N.where(bics == _N.max(bics))[0][0] lab = labsAll[bestI] MF = mAll[bestI] # now assign the cluster we've found to Gaussian mixtures SI = N / MF covAll = _N.cov(x.T) dcovMag= _N.diagonal(covAll)*0.005 for im in xrange(MF): kinds = _N.where(lab == im)[0] # inds if len(kinds) > 6: # problem when cov is not positive def. oo.smu[0, im] = _N.mean(x[kinds], axis=0) oo.scov[0, im] = _N.cov(x[kinds], rowvar=0) oo.sm[0, im] = float(len(kinds)+1) / (N+MF) else: #oo.smu[0, im] = _N.mean(x[sigInds], axis=0) oo.smu[0, im] = _N.mean(x, axis=0) oo.scov[0, im] = covAll*0.125 oo.sm[0, im] = float(len(kinds)+1) / (N+MF)