def penalized_ll(nbcs, data): ll = 0.0 cluster_priors = [log(1.0 * c.size / len(data)) for c in nbcs] for inst in data: vals = [cluster_priors[i] + nbcs[i].fast_ll(inst, float('-inf')) for i in range(len(nbcs))] ll += logsumexp( vals ) ll -= knobs.cluster_penalty * len(nbcs) * data.shape[1] return ll
def nbmix_likelihood_from_model(data, lprobs, priors): nc = lprobs[0].shape[0] lp = np.zeros(nc) ll = 0.0 for d in data: lp[:] = 0.0 for j in range(data.shape[1]): lp += lprobs[j,:,d[j]] lp += priors ll += logsumexp(lp) return ll
def max_likelihood_qa(data, nvals, qa, approximate=False, smooth=0.1): nc = qa.max() + 1 sil = np.zeros(nc) for i in range(nc): di = data[qa==i].astype(np.int) total = float(len(di)) si = 0.0 for j in range(di.shape[1]): bc = np.bincount(di[:,j]) for c in bc: if c == 0: continue si += c*log(c / total) si += log(total / len(data)) # cluster prior sil[i] = si s = sil.sum() if approximate else logsumexp(sil) return s