예제 #1
0
    def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True):
        """
        M       total number of clusters

        MS      number of clusters assigned to signal
        MF      number of clusters used for initial fit
        M - MF  If doing touchup, number of clusters to assign to this
        """
        print "init0"
        oo = self

        k  = oo.k
        MF = oo.M if MF is None else MF

        print "MF  %d" % MF

        _x   = _N.empty((n2-n1, k))
        _x[:, 0]    = pos
        _x[:, 1:]   = mk
        N   = n2-n1

        #  Gibbs sampling 
        ################  init cluster centers
        if sepHash:  #  treat hash spikes seperately
            print "sepHashMthd  %d" % sepHashMthd
            if sepHashMthd == 0:
                ##########################
                bgCh  = _N.max(_x[:, 1:], axis=1)
                inds  = _N.array([i[0] for i in sorted(enumerate(bgCh), key=lambda x:x[1])])
                #  hash are lowest 70%
                #  signal are highest 30%

                pH        = int(bgCh.shape[0]*pctH)
                if MS is None:
                    MH        = int(M*pctH)
                    MS        = MF - MH
                else:
                    MH        = MF - MS

                sigInds       = inds[pH:]
                smkpos        = _x[sigInds]
                labH = _fu.bestcluster(50, _x[inds[0:pH]], MH)

                labS = _fu.bestcluster(50, smkpos, MS)
            else:  #  sepHashMthd == 1
                ##########################
                BINS    = 20
                bins    = _N.linspace(-6, 6, BINS+1)

                cumcnts = _N.zeros(BINS)

                blksz   = 20

                #####################   separate hash / nonhash indices
                nonhash = []
                for ch in xrange(1, 5):
                    done    = False
                    inds  = _N.array([i[0] for i in sorted(enumerate(_x[:, ch]), key=lambda x:x[1], reverse=True)])

                    blk = -1
                    cumcnts[:] = 0

                    while not done:
                        blk += 1
                        cnts, bns = _N.histogram(_x[inds[blk*blksz:(blk+1)*blksz], 0], bins=bins)
                        cumcnts += cnts
                        if len(_N.where(cumcnts < 2)[0]) <= 3:
                            done = True
                            nonhash.extend(inds[0:(blk+1)*blksz])

                unonhash = _N.unique(nonhash)  #  not hash spikes
                hashsp   = _N.setdiff1d(inds, unonhash)  #  inds is contiguous but reordered all

                ##  place-specific firing of 
                #_x[:, 0] *= 5

                MH        = MF - MS

                sigInds       = unonhash
                smkpos        = _x[sigInds]

                #labS = _fu.bestcluster(50, smkpos, MS)
                #labS, MSA = _fu.findBestClusterBySplit(smkpos, MS, oo.k, 15)
                labS = _fu.spClstrs3MkCl(smkpos)
                MSA  = len(labS)
                if MSA > MS:
                    MH        = MF - MS
                    MS        = MSA

                #_fu.colorclusters(smkpos, labS, MSA)

                labH = _fu.bestcluster(50, _x[hashsp], MH)
                #bins = _N.linspace(-30, 30, 101)
                #labH = _fu.positionalClusters(_x[hashsp, 0], bins, MH)
                #histdat = _plt.hist(_x[hashsp, 0], bins=bins)
                #_N.savetxt("hist", histdat[0], fmt="%.4f")
                #_N.savetxt("hash", _x[hashsp], fmt="%.4f %.4f %.4f %.4f %.4f")
                #_x[:, 0] /= 5
                #scrH, labH = scv.kmeans2(_x[hashsp], MH)


            ##################
            lab        = _N.array(labH.tolist() + (labS + MH).tolist())
            x          = _N.empty((n2-n1, k))
            if sepHashMthd == 0:
                x[:, 0]    = _x[inds, 0]
                x[:, 1:]   = _x[inds, 1:]
            else:
                x[0:len(hashsp)] = _x[hashsp]
                x[len(hashsp):]  = _x[sigInds]
        else:  #  don't separate hash from signal marks. simple kmeans2
            x = _x
            if not kmeansinit:  #  just random initial conditions
                print "random initial conditions"
                lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int)
            else:
                scr, lab = scv.kmeans2(x, MF)

        #  now assign the cluster we've found to Gaussian mixtures
        SI = N / MF
        covAll = _N.cov(x.T)
        dcovMag= _N.diagonal(covAll)*0.005

        for im in xrange(MF):
            kinds = _N.where(lab == im)[0]  #  inds

            if len(kinds) > 6:   # problem when cov is not positive def.
                oo.smu[0, im]  = _N.mean(x[kinds], axis=0)
                oo.scov[0, im] = _N.cov(x[kinds], rowvar=0)
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)
            else:
                #oo.smu[0, im]  = _N.mean(x[sigInds], axis=0)
                oo.smu[0, im]  = _N.mean(x, axis=0)
                oo.scov[0, im] = covAll*0.125
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)
예제 #2
0
    def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True):
        """
        M       total number of clusters

        MS      number of clusters assigned to signal
        MF      number of clusters used for initial fit
        M - MF  If doing touchup, number of clusters to assign to this
        """
        print "init0"
        oo = self

        k  = oo.k
        MF = oo.M if MF is None else MF

        print "MF  %d" % MF

        _x   = _N.empty((n2-n1, k))
        _x[:, 0]    = pos
        _x[:, 1:]   = mk
        N   = n2-n1

        #  Gibbs sampling 
        ################  init cluster centers
        if sepHash:  #  treat hash spikes seperately
            ##########################
            BINS    = 20
            bins    = _N.linspace(-6, 6, BINS+1)
            blksz   = 20

            unonhash, hashsp = _fu.sepHash(_x,BINS=BINS,blksz=20,xlo=-6,xhi=6)

            MH        = MF - MS

            sigInds       = unonhash
            smkpos        = _x[sigInds]
            print smkpos

            labS = _fu.spClstrs3MkCl(smkpos)
            MSA  = len(labS)
            if MSA > MS:
                MH        = MF - MS
                MS        = MSA

            labH = _fu.bestcluster(50, _x[hashsp], MH)

            ##################
            lab        = _N.array(labH.tolist() + (labS + MH).tolist())
            x          = _N.empty((n2-n1, k))
            if sepHashMthd == 0:
                x[:, 0]    = _x[inds, 0]
                x[:, 1:]   = _x[inds, 1:]
            else:
                x[0:len(hashsp)] = _x[hashsp]
                x[len(hashsp):]  = _x[sigInds]
        else:  #  don't separate hash from signal marks. simple kmeans2
            x = _x
            if not kmeansinit:  #  just random initial conditions
                print "random initial conditions"
                lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int)
            else:
                ITERS = 20
                labsAll = []
                mAll    = []
                bics  = _N.empty(ITERS)

                for it in xrange(ITERS):
                    scr, lab = scv.kmeans2(x, MF)
                    _fu.contiguous_pack(lab)
                    bic, K     = _fu.kmBIC(scr, lab, x)
                    bics[it]   = bic
                    mAll.append(K)
                    labsAll.append(lab)

                bestI = _N.where(bics == _N.max(bics))[0][0]
                lab = labsAll[bestI]
                MF  = mAll[bestI]

        #  now assign the cluster we've found to Gaussian mixtures
        SI = N / MF
        covAll = _N.cov(x.T)
        dcovMag= _N.diagonal(covAll)*0.005

        for im in xrange(MF):
            kinds = _N.where(lab == im)[0]  #  inds

            if len(kinds) > 6:   # problem when cov is not positive def.
                oo.smu[0, im]  = _N.mean(x[kinds], axis=0)
                oo.scov[0, im] = _N.cov(x[kinds], rowvar=0)
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)
            else:
                #oo.smu[0, im]  = _N.mean(x[sigInds], axis=0)
                oo.smu[0, im]  = _N.mean(x, axis=0)
                oo.scov[0, im] = covAll*0.125
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)