예제 #1
0
def initClusters(oo, K, x, mks, t0, t1, Asts, doSepHash=True, xLo=0, xHi=3, oneCluster=False, nzclstr=False):
    n0 = 0
    n1 = len(Asts)

    _x   = _N.empty((n1-n0, K+1))
    _x[:, 0]    = x[Asts+t0]
    _x[:, 1:]   = mks[Asts+t0]

    if oneCluster:
        unonhash = _N.arange(len(Asts))
        hashsp   = _N.array([])
        hashthresh = _N.min(_x[:, 1:], axis=0)   #  no hash spikes

        labS     = _N.zeros(len(Asts), dtype=_N.int)
        labH     = _N.array([], dtype=_N.int)
        clstrs   = _N.array([0, 1])
        lab      = _N.array(labS.tolist() + (labH + clstrs[0]).tolist())
        M        = 1
        MF       = 1
        flatlabels = _N.zeros(len(Asts), dtype=_N.int)
    else:
        if not doSepHash:
            unonhash = _N.arange(len(Asts))
            hashsp   = _N.array([])
            hashthresh = _N.min(_x[:, 1:], axis=0)   #  no hash spikes

            ###   1 cluster
            # labS = _N.zeros(len(Asts), dtype=_N.int)
            # labH = _N.array([], dtype=_N.int)
            # clstrs = _N.array([0, 1])
        else:
            unonhash, hashsp, hashthresh = sepHash(_x, BINS=20, blksz=5, xlo=oo.xLo, xhi=oo.xHi)
            #  hashthresh is dim 2

            # print len(unonhash)
            # print "--------"
            # print len(hashsp)
            # fig = _plt.figure(figsize=(5, 10))
            # fig.add_subplot(3, 1, 1)
            # _plt.scatter(_x[hashsp, 1], _x[hashsp, 2], color="red")
            # _plt.scatter(_x[unonhash, 1], _x[unonhash, 2], color="black")
            # fig.add_subplot(3, 1, 2)
            # _plt.scatter(_x[hashsp, 0], _x[hashsp, 1], color="red")
            # _plt.scatter(_x[unonhash, 0], _x[unonhash, 1], color="black")
            # fig.add_subplot(3, 1, 3)
            # _plt.scatter(_x[hashsp, 0], _x[hashsp, 2], color="red")
            # _plt.scatter(_x[unonhash, 0], _x[unonhash, 2], color="black")


        # len(hashsp)==len(labH)
        # len(unonhash)==len(labS)
        if (len(unonhash) > 0) and (len(hashsp) > 0): 
            labS, labH, clstrs = emMKPOS_sep1B(_x[unonhash], _x[hashsp])
        elif len(unonhash) == 0:
            labS, labH, clstrs = emMKPOS_sep1B(None, _x[hashsp], TR=5)
        else:
            labS, labH, clstrs = emMKPOS_sep1B(_x[unonhash], None, TR=5)
        if doSepHash:
            splitclstrs(_x[unonhash], labS)
            posMkCov0(_x[unonhash], labS)

            #mergesmallclusters(_x[unonhash], _x[hashsp], labS, labH, K+1, clstrs)
            smallClstrID, spksInSmallClstrs = findsmallclusters(_x[unonhash], labS, K+1)

            print smallClstrID
            _N.savetxt("labSb4", labS, fmt="%d")
            for nid in smallClstrID:
                ths = _N.where(labS == nid)[0]
                labS[ths] = -1#clstrs[0]+clstrs[1]-1  # -1 first for easy cpack2
            _N.savetxt("labS", labS, fmt="%d")

            # 0...clstrs[0]-1     clstrs[0]...clstrs[0]+clstrs[1]-1  (no nz)
            # 0...clstrs[0]-2     clstrs[0]-1...clstrs[0]+clstrs[1]-2  (no nz)
            contiguous_pack2(labS, startAt=-1)

            clstrs[0] = len(_N.unique(labS)) 
            clstrs[1] = len(_N.unique(labH))

            print "----------"
            print clstrs
            print "----------"
            # labS [0...#S]   labH [#S...#S+#H]
            
            nzspks = _N.where(labS == -1)[0]
            labS[nzspks] = clstrs[0]+clstrs[1]-1   #  highest ID

            contiguous_pack2(labH, startAt=(clstrs[0]-1))
            _N.savetxt("labH", labH, fmt="%d")
            _N.savetxt("labS", labS, fmt="%d")

            #contiguous_pack2(labH, startAt=(_N.max(labS)+1))

            nonnz = _N.where(labS < clstrs[0]-1)[0]
            nz    = _N.where(labS == clstrs[0]+clstrs[1]-1)[0]
            _plt.scatter(_x[hashsp, 0], _x[hashsp, 1], color="black")
            _plt.scatter(_x[unonhash[nonnz], 0], _x[unonhash[nonnz], 1], color="blue")
            _plt.scatter(_x[unonhash[nz], 0], _x[unonhash[nz], 1], color="red")

            #colorclusters(_x[hashsp], labH, clstrs[1], name="hash", xLo=xLo, xHi=xHi)
            #colorclusters(_x[unonhash], labS, clstrs[0], name="nhash", xLo=xLo, xHi=xHi)


    #     #fig = _plt.figure(figsize=(7, 10))
    #     #fig.add_subplot(2, 1, 1)

        flatlabels = _N.ones(n1-n0, dtype=_N.int)*-1   # 
        #cls = clrs.get_colors(clstrs[0] + clstrs[1])

        for i in labS:
            these = _N.where(labS == i)[0]

            if len(these) > 0:
                flatlabels[unonhash[these]] = i
            #_plt.scatter(_x[unonhash[these], 0], _x[unonhash[these], 1], color=cls[i])
        #for i in xrange(clstrs[1]):
        for i in labH:
            these = _N.where(labH == i)[0]

            if len(these) > 0:
                flatlabels[hashsp[these]] = i 
            #_plt.scatter(_x[hashsp[these], 0], _x[hashsp[these], 1], color=cls[i+clstrs[0]])

        MF     = clstrs[0] + clstrs[1]   #  includes noise
        if nzclstr:
            ths = _N.where(flatlabels == -1)[0]
            flatlabels[ths] = MF - 1
            M = int((clstrs[0]-1) * 1.3 + clstrs[1]) + 2   #  20% more clusters
        else:
            M = int(clstrs[0] * 1.3 + clstrs[1]) + 2   #  20% more clusters
        print "cluters:  %d" % M

    Mwonz     = M if (nzclstr is False) else M-1
    #####  MODES  - find from the sampling
    oo.sp_prmPstMd = _N.zeros((oo.epochs, 3*Mwonz))   # mode of params
    oo.sp_hypPstMd  = _N.zeros((oo.epochs, (2+2+2)*Mwonz)) # hyperparam
    oo.mk_prmPstMd = [_N.zeros((oo.epochs, Mwonz, K)),
                      _N.zeros((oo.epochs, Mwonz, K, K))]
                      # mode of params
    oo.mk_hypPstMd  = [_N.zeros((oo.epochs, Mwonz, K)),
                       _N.zeros((oo.epochs, Mwonz, K, K)), # hyperparam
                       _N.zeros((oo.epochs, Mwonz, 1)), # hyperparam
                       _N.zeros((oo.epochs, Mwonz, K, K))]

    print labS
    print labH
    _N.savetxt("flatlabels", flatlabels, fmt="%d")
    ##################

    # flatlabels + lab = same content, but flatlabels are temporally correct
    return labS, labH, flatlabels, Mwonz, MF, hashthresh, clstrs
예제 #2
0
    def init0(self, pos, mk, n1, n2, sepHash=False, pctH=0.7, MS=None, sepHashMthd=0, doTouchUp=False, MF=None, kmeansinit=True):
        """
        M       total number of clusters

        MS      number of clusters assigned to signal
        MF      number of clusters used for initial fit
        M - MF  If doing touchup, number of clusters to assign to this
        """
        print "init0"
        oo = self

        k  = oo.k
        MF = oo.M if MF is None else MF

        print "MF  %d" % MF

        _x   = _N.empty((n2-n1, k))
        _x[:, 0]    = pos
        _x[:, 1:]   = mk
        N   = n2-n1

        #  Gibbs sampling 
        ################  init cluster centers
        if sepHash:  #  treat hash spikes seperately
            ##########################
            BINS    = 20
            bins    = _N.linspace(-6, 6, BINS+1)
            blksz   = 20

            unonhash, hashsp = _fu.sepHash(_x,BINS=BINS,blksz=20,xlo=-6,xhi=6)

            MH        = MF - MS

            sigInds       = unonhash
            smkpos        = _x[sigInds]
            print smkpos

            labS = _fu.spClstrs3MkCl(smkpos)
            MSA  = len(labS)
            if MSA > MS:
                MH        = MF - MS
                MS        = MSA

            labH = _fu.bestcluster(50, _x[hashsp], MH)

            ##################
            lab        = _N.array(labH.tolist() + (labS + MH).tolist())
            x          = _N.empty((n2-n1, k))
            if sepHashMthd == 0:
                x[:, 0]    = _x[inds, 0]
                x[:, 1:]   = _x[inds, 1:]
            else:
                x[0:len(hashsp)] = _x[hashsp]
                x[len(hashsp):]  = _x[sigInds]
        else:  #  don't separate hash from signal marks. simple kmeans2
            x = _x
            if not kmeansinit:  #  just random initial conditions
                print "random initial conditions"
                lab = _N.array(_N.random.rand(N)*MF, dtype=_N.int)
            else:
                ITERS = 20
                labsAll = []
                mAll    = []
                bics  = _N.empty(ITERS)

                for it in xrange(ITERS):
                    scr, lab = scv.kmeans2(x, MF)
                    _fu.contiguous_pack(lab)
                    bic, K     = _fu.kmBIC(scr, lab, x)
                    bics[it]   = bic
                    mAll.append(K)
                    labsAll.append(lab)

                bestI = _N.where(bics == _N.max(bics))[0][0]
                lab = labsAll[bestI]
                MF  = mAll[bestI]

        #  now assign the cluster we've found to Gaussian mixtures
        SI = N / MF
        covAll = _N.cov(x.T)
        dcovMag= _N.diagonal(covAll)*0.005

        for im in xrange(MF):
            kinds = _N.where(lab == im)[0]  #  inds

            if len(kinds) > 6:   # problem when cov is not positive def.
                oo.smu[0, im]  = _N.mean(x[kinds], axis=0)
                oo.scov[0, im] = _N.cov(x[kinds], rowvar=0)
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)
            else:
                #oo.smu[0, im]  = _N.mean(x[sigInds], axis=0)
                oo.smu[0, im]  = _N.mean(x, axis=0)
                oo.scov[0, im] = covAll*0.125
                oo.sm[0, im]   = float(len(kinds)+1) / (N+MF)