예제 #1
0
def _model(x, asn, sampler=_hybrid, alpha=0.1, annealing=None):
    """Create clustering model."""
    return bmcc.BayesianMixture(
        data=x,
        sampler=sampler,
        component_model=bmcc.NormalWishart(df=ds.dim,
                                           scale=np.identity(ds.dim) *
                                           np.sqrt(ds.dim)),
        mixture_model=bmcc.DPM(alpha=alpha, use_eb=False),
        annealing=annealing,
        assignments=asn,
        thinning=10)
예제 #2
0
def run_sample(args):
    """Run MCMC sampling"""

    # Unpack
    path, method_name = args

    dst = path.replace(
        BASE_DIR, os.path.join(RESULT_DIR, method_name))

    # Ignore if test already run (file present)
    if os.path.exists(dst):
        return

    # Load dataset
    dataset = bmcc.GaussianMixture(path, load=True)

    # Fetch method
    method = METHODS[method_name]

    cm = bmcc.NormalWishart(
        df=dataset.d,
        scale=np.identity(dataset.d) if SCALE_MATRIX else None)
    mm = method["mixture"](dataset.k)

    # Create model
    model = bmcc.BayesianMixture(
        data=dataset.data,
        sampler=method["sampler"],
        component_model=cm,
        mixture_model=mm,
        assignments=np.zeros(dataset.n).astype(np.uint16),
        thinning=5)

    # Run iterations (break on exceeding limit)
    try:
        for i in range(5000):
            model.iter()
            if np.max(model.assignments) > CLUSTERS_LIMIT:
                break
    except Exception as e:
        print("Exception in {} / {}:".format(method_name, path))
        print(e)

    np.savez(dst, hist=model.hist)
예제 #3
0
def hybrid(*args, **kwargs):

    for _ in range(5):
        bmcc.gibbs(*args, **kwargs)
    bmcc.split_merge(*args, **kwargs)


mm = bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, 3))
# mm = bmcc.DPM(alpha=1, use_eb=False)
cm = bmcc.NormalWishart(df=3)

# Create mixture model
model = bmcc.BayesianMixture(data=dataset.data,
                             sampler=hybrid,
                             component_model=cm,
                             mixture_model=mm,
                             assignments=np.zeros(POINTS).astype(np.uint16),
                             thinning=THINNING)

# Run Iterations
start = time.time()
for i in tqdm(range(ITERATIONS)):
    model.iter()
print("gibbs_iterate: {:.2f}s [{:.2f} ms/iteration]".format(
    time.time() - start, (time.time() - start) * 1000 / ITERATIONS))

# Select Least Squares clustering
start = time.time()
res = model.select_lstsq(burn_in=BURN_IN)
res.evaluate(dataset.assignments,
             oracle=dataset.oracle,
예제 #4
0
파일: sbm.py 프로젝트: thetianshuhuang/bmcc
ITERATIONS = 2000
K = 3
Q = np.identity(K) * 0.2 + np.ones((K, K)) * 0.1

ds = bmcc.StochasticBlockModel(n=N, k=K, r=1, a=0.8, b=1, shuffle=False, Q=Q)
print(ds.Q)

# plt.matshow(ds.data)
# plt.show()

start = time.time()

model = bmcc.BayesianMixture(
    data=ds.data,
    sampler=bmcc.gibbs,
    component_model=bmcc.SBM(a=1, b=1),
    mixture_model=bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, K)),
    # mixture_model=bmcc.DPM(alpha=1),
    assignments=np.zeros(N).astype(np.uint16),
    thinning=1)

for _ in tqdm(range(ITERATIONS)):
    model.iter()

print(time.time() - start)

res = model.select_lstsq(burn_in=1500)

fig, axs = plt.subplots(2, 2)
axs[0][0].matshow(ds.data)
axs[0][1].matshow(res.matrix)
axs[1][0].plot(res.num_clusters)