Beispiel #1
0
Datei: nbc.py Projekt: awd4/spnss
def max_likelihood_qa(data, nvals, qa, approximate=False, smooth=0.1):
    nc = qa.max() + 1
    sil = np.zeros(nc)
    for i in range(nc):
        di = data[qa==i].astype(np.int)
        total = float(len(di))
        si = 0.0
        for j in range(di.shape[1]):
            bc = np.bincount(di[:,j])
            for c in bc:
                if c == 0:
                    continue
                si += c*log(c / total)
        si += log(total / len(data)) # cluster prior
        sil[i] = si
    s = sil.sum() if approximate else logsumexp(sil)
    return s 
Beispiel #2
0
Datei: nbc.py Projekt: awd4/spnss
def penalized_ll(nbcs, data):
    ll = 0.0
    cluster_priors = [log(1.0 * c.size / len(data)) for c in nbcs]
    for inst in data:
        vals = [cluster_priors[i] + nbcs[i].fast_ll(inst, float('-inf')) for i in range(len(nbcs))]
        ll += logsumexp( vals )
    ll -= knobs.cluster_penalty * len(nbcs) * data.shape[1]
    return ll
Beispiel #3
0
Datei: nbc.py Projekt: awd4/spnss
 def ll(self, inst, best_cll=float('-inf')):
     sstats = self.sstats
     l = 0.0
     for i in xrange(len(self.nvals)):
         v = inst[i]
         w = sstats[i][v]
         l += log((w + self.smoo) / (self.size + self.nvals[i]*self.smoo))
         if l < best_cll:
             return l
     return l
Beispiel #4
0
Datei: nbc.py Projekt: awd4/spnss
def nbmix_model(data, nvals, qa, smooth=0.1):
    data = data.astype(np.int, copy=False)
    nc = qa.max() + 1
    n = data.shape[1]
    m = data.shape[0]
    # compute params for NB models
    lprobs = float('-inf')*np.ones( (n, nc, max(nvals)) )
    priors = np.zeros(nc)
    for i in range(nc):
        di = data[qa==i]
        di_size = float(len(di))
        priors[i] = log(di_size / m)
        for j in range(n):
            bc = np.bincount(di[:,j], minlength=nvals[j])
            for k in range(nvals[j]):
                c = bc[k]
                if c == 0:
                    continue
                lprobs[j,i,k] = log((c + smooth) / (di_size + smooth*nvals[j]))
    return lprobs, priors
Beispiel #5
0
Datei: nbc.py Projekt: awd4/spnss
def new_ll(nvals, smooth=0.1):
    return sum( log((1. + smooth) / (1. + v * smooth)) for v in nvals )