def main(args): prng = np.random.RandomState(args.seed) T = np.logspace(args.start, args.num + 1, base=args.base, num=args.num) print 'sample size: %s' % map(int,T) sys.stdout.flush() resid = [] for t in T: mresid, sresid, presid = [],[],[] for m, s, p in zip(*sample_params(args, prng)): gmm = GMM(args.components) gmm.means = m gmm.covars = s gmm.weights = p x = gmm.rvs(t) gmm2 = GMM(args.components) gmm2.fit(x, n_iter=20) m, s, p = identify(m, s, p) m2, s2, p2 = identify(gmm2.means, gmm2.covars, gmm2.weights) mresid.extend(np.abs(m - m2)) sresid.extend(np.abs(s - s2)) presid.extend(np.abs(p - p2)) nsq = np.sqrt(args.reps) mresid = (t, np.mean(mresid), np.std(mresid) / nsq) sresid = (t, np.mean(sresid), np.std(sresid) / nsq) presid = (t, np.mean(presid), np.std(presid) / nsq) resid.append((mresid, sresid, presid)) resid = np.asarray(zip(*resid)) plot(args, resid) return resid
def main(ns): global markers, components res = [] for s in ns.sizes: tgmm = TGMM(2, ns.bounds ) tgmm.means = ns.means tgmm.covars = [1,1] tgmm.weights = [.5,.5] allsamples = tgmm.rvs((ns.reps, s)) tmp = [] for sample in allsamples: gmm = GMM(components) gmm.fit(sample[:,np.newaxis]) means = gmm.means.ravel() means.sort() tmp.append(np.abs(means - tgmm.means)) res.append(zip([s] * components, np.asarray(tmp).mean(axis=0))) res = np.asarray(res).swapaxes(0,1) for i in xrange(components): hold(1) x, y = res[i].T if ns.loglog: loglog(x, y, ':'+markers[i], c='k', label='$\mu_%d$' % (i+1)) else: plot(x, y, ':'+markers[i], c='k', label='$\mu_%d$' % (i+1)) legend() xlabel('sample size') ylabel('average absolute error') title(r'$\mu_1 = %g,\quad \mu_2 = %g,\quad x\in\left[%g,%g\right]$' % (tuple(ns.means) + tuple(ns.bounds))) show()
def fit_gmm1(components,x): from scikits.learn.mixture import GMM, DPGMM, VBGMM x = np.asanyarray(x).reshape(-1,1) #gmm_baf = VBGMM(components,verbose=True,min_covar=0.01) gmm_baf = GMM(components) gmm_baf.fit(x) mu = np.array(gmm_baf.means, dtype=float).reshape(-1) order = mu.argsort() mu = mu[order] sd = np.array(gmm_baf.covars, dtype=float).reshape(-1)[order]**0.5 ws = np.array(gmm_baf.weights, dtype=float).reshape(-1)[order] logL = gmm_baf.score(x) return logL,mu,sd,ws
l = 256 im = np.zeros((l, l)) points = l*np.random.random((2, n**2)) im[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1 im = ndimage.gaussian_filter(im, sigma=l/(4.*n)) mask = (im > im.mean()).astype(np.float) img = mask + 0.3*np.random.randn(*mask.shape) hist, bin_edges = np.histogram(img, bins=60) bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:]) classif = GMM(n_components=2, cvtype='full') classif.fit(img.reshape((img.size, 1))) threshold = np.mean(classif.means) binary_img = img > threshold plt.figure(figsize=(11,4)) plt.subplot(131) plt.imshow(img) plt.axis('off') plt.subplot(132) plt.plot(bin_centers, hist, lw=2) plt.axvline(0.5, color='r', ls='--', lw=2) plt.text(0.57, 0.8, 'histogram', fontsize=20, transform = plt.gca().transAxes) plt.yticks([])
n = 10 l = 256 im = np.zeros((l, l)) points = l * np.random.random((2, n**2)) im[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1 im = ndimage.gaussian_filter(im, sigma=l / (4. * n)) mask = (im > im.mean()).astype(np.float) img = mask + 0.3 * np.random.randn(*mask.shape) hist, bin_edges = np.histogram(img, bins=60) bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:]) classif = GMM(n_components=2, cvtype='full') classif.fit(img.reshape((img.size, 1))) threshold = np.mean(classif.means) binary_img = img > threshold plt.figure(figsize=(11, 4)) plt.subplot(131) plt.imshow(img) plt.axis('off') plt.subplot(132) plt.plot(bin_centers, hist, lw=2) plt.axvline(0.5, color='r', ls='--', lw=2) plt.text(0.57, 0.8, 'histogram', fontsize=20, transform=plt.gca().transAxes) plt.yticks([]) plt.subplot(133)
for i, mu in enumerate(c_mu): A = np.linspace(mu - 10 * model_sigma, mu + 10 * model_sigma, 100) B = 1 / np.sqrt(2 * np.pi * model_sigma**2) * np.exp(-(A - mu)**2 / (2 * model_sigma**2)) B *= pi[i] c = colors[i % len(colors)] plt.plot(A, B, '%s-' % c) plt.hist(X, bins=50, alpha=0.5, facecolor='lightgrey') plt.show() # burn-in #estimated_c_X, estimated_c_mu = estimate_dpm_model( alpha, X, burn_in_iterations, prior_mu, prior_sigma, model_sigma ) gmm = GMM(K, 'full') gmm.fit(X, 0, init_params='wmc') while not gmm.converged_: gmm.fit(X, 40, init_params='') pi = gmm.weights mu = gmm.means sigma = gmm.covars #print 'real components:' #print c_mu #print 'estimated components:' #print estimated_c_mu colors = 'rgbmc' for i, (m, s, p) in enumerate(zip(mu, sigma, pi)):
colors = 'rgbmc' for i,mu in enumerate( c_mu ): A = np.linspace( mu - 10*model_sigma, mu + 10*model_sigma, 100 ) B = 1/np.sqrt(2*np.pi*model_sigma**2)*np.exp(-(A-mu)**2/(2*model_sigma**2)) B *= pi[ i ] c = colors[ i % len( colors ) ] plt.plot( A, B, '%s-' % c ) plt.hist( X, bins=50, alpha=0.5, facecolor='lightgrey' ) plt.show() # burn-in #estimated_c_X, estimated_c_mu = estimate_dpm_model( alpha, X, burn_in_iterations, prior_mu, prior_sigma, model_sigma ) gmm = GMM( K, 'full' ) gmm.fit( X, 0, init_params='wmc' ) while not gmm.converged_: gmm.fit( X, 40, init_params='' ) pi = gmm.weights mu = gmm.means sigma = gmm.covars #print 'real components:' #print c_mu #print 'estimated components:' #print estimated_c_mu colors = 'rgbmc'