def _model(x, asn, sampler=_hybrid, alpha=0.1, annealing=None): """Create clustering model.""" return bmcc.BayesianMixture( data=x, sampler=sampler, component_model=bmcc.NormalWishart(df=ds.dim, scale=np.identity(ds.dim) * np.sqrt(ds.dim)), mixture_model=bmcc.DPM(alpha=alpha, use_eb=False), annealing=annealing, assignments=asn, thinning=10)
def run_sample(args): """Run MCMC sampling""" # Unpack path, method_name = args dst = path.replace( BASE_DIR, os.path.join(RESULT_DIR, method_name)) # Ignore if test already run (file present) if os.path.exists(dst): return # Load dataset dataset = bmcc.GaussianMixture(path, load=True) # Fetch method method = METHODS[method_name] cm = bmcc.NormalWishart( df=dataset.d, scale=np.identity(dataset.d) if SCALE_MATRIX else None) mm = method["mixture"](dataset.k) # Create model model = bmcc.BayesianMixture( data=dataset.data, sampler=method["sampler"], component_model=cm, mixture_model=mm, assignments=np.zeros(dataset.n).astype(np.uint16), thinning=5) # Run iterations (break on exceeding limit) try: for i in range(5000): model.iter() if np.max(model.assignments) > CLUSTERS_LIMIT: break except Exception as e: print("Exception in {} / {}:".format(method_name, path)) print(e) np.savez(dst, hist=model.hist)
def hybrid(*args, **kwargs): for _ in range(5): bmcc.gibbs(*args, **kwargs) bmcc.split_merge(*args, **kwargs) mm = bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, 3)) # mm = bmcc.DPM(alpha=1, use_eb=False) cm = bmcc.NormalWishart(df=3) # Create mixture model model = bmcc.BayesianMixture(data=dataset.data, sampler=hybrid, component_model=cm, mixture_model=mm, assignments=np.zeros(POINTS).astype(np.uint16), thinning=THINNING) # Run Iterations start = time.time() for i in tqdm(range(ITERATIONS)): model.iter() print("gibbs_iterate: {:.2f}s [{:.2f} ms/iteration]".format( time.time() - start, (time.time() - start) * 1000 / ITERATIONS)) # Select Least Squares clustering start = time.time() res = model.select_lstsq(burn_in=BURN_IN) res.evaluate(dataset.assignments, oracle=dataset.oracle,
ITERATIONS = 2000 K = 3 Q = np.identity(K) * 0.2 + np.ones((K, K)) * 0.1 ds = bmcc.StochasticBlockModel(n=N, k=K, r=1, a=0.8, b=1, shuffle=False, Q=Q) print(ds.Q) # plt.matshow(ds.data) # plt.show() start = time.time() model = bmcc.BayesianMixture( data=ds.data, sampler=bmcc.gibbs, component_model=bmcc.SBM(a=1, b=1), mixture_model=bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, K)), # mixture_model=bmcc.DPM(alpha=1), assignments=np.zeros(N).astype(np.uint16), thinning=1) for _ in tqdm(range(ITERATIONS)): model.iter() print(time.time() - start) res = model.select_lstsq(burn_in=1500) fig, axs = plt.subplots(2, 2) axs[0][0].matshow(ds.data) axs[0][1].matshow(res.matrix) axs[1][0].plot(res.num_clusters)