예제 #1
0
파일: cluster.py 프로젝트: whitews/fcm
    def fit(self, datasets, verbose=False, tune_interval=100):
        if isinstance(datasets, FCMCollection):
            datasets = datasets.to_list()
        self.d = datasets[0].shape[1]

        datasets = [i.copy().astype('double') for i in datasets]
        self.ndatasets = len(datasets)
        total_data = vstack(datasets)
        self.m = mean(total_data, 0)
        self.s = std(total_data, 0)
        standardized = []
        for i in datasets:
            if i.shape[1] != self.d:
                raise RuntimeError("Datasets shape do not match")
            standardized.append(((i - self.m) / self.s))

        if self.prior_mu is not None:
            self._load_mu_at_fit()
        if self.prior_sigma is not None:
            self._load_sigma_at_fit()

        if self.seed is not None:
            seed(self.seed)
        else:
            from datetime import datetime
            seed(datetime.now().microsecond)
        self.hdp = HDPNormalMixture(standardized,
                                    ncomp=self.nclusts,
                                    gamma0=self.gamma0,
                                    m0=self.m0,
                                    nu0=self.nu0,
                                    Phi0=self.Phi0,
                                    e0=self.e0,
                                    f0=self.f0,
                                    g0=self.g0,
                                    h0=self.h0,
                                    mu0=self._prior_mu,
                                    Sigma0=self._prior_sigma,
                                    weights0=self._prior_pi,
                                    alpha0=self.alpha0,
                                    gpu=self.device,
                                    parallel=self.parallel,
                                    verbose=verbose)
        self.hdp.sample(niter=self.niter,
                        nburn=self.burnin,
                        thin=1,
                        ident=self.ident,
                        tune_interval=tune_interval)

        self._run = True  #we've fit the mixture model

        return self.get_results()
예제 #2
0
파일: test_hdp.py 프로젝트: jethrotan/dpmix
from dpmix import HDPNormalMixture

#import gpustats as gs

if __name__ == '__main__':

    N = int(1e5)
    K = 2
    J = 4
    ncomps = 3
    true_labels, data = generate_data(n=N, k=K, ncomps=ncomps)
    data = data - data.mean(0)
    data = data / data.std(0)
    #shuffle the data ...
    ind = np.arange(N)
    np.random.shuffle(ind)
    all_data = data[ind].copy()
    data = [all_data[(N / J * i):(N / J * (i + 1))].copy() for i in range(J)]

    mcmc = HDPNormalMixture(data,
                            ncomp=100,
                            gpu=[0, 1, 2],
                            parallel=True,
                            verbose=100)
    mcmc.sample(2, nburn=5, tune_interval=100)
    imcmc = HDPNormalMixture(mcmc, verbose=100)
    imcmc.sample(2, nburn=0, ident=True)
    print imcmc.mu[-1]
    print imcmc.weights[-1]
    print imcmc.beta[-1]
예제 #3
0
import numpy.random as npr
from dpmix import HDPNormalMixture

if __name__ == '__main__':
    nclust = 256
    niter = 10
    burnin = 10
    device = 1
    max_events = 50000
    num_files = 10

    seed = 9
    #npr.seed(seed)
    for it in range(10, 20):
        xs = []
        for i in range(num_files):
            print i,
            xs.append(npr.uniform(-5, 5, (max_events, 5)))
        print
        mcmc = HDPNormalMixture(xs,
                                ncomp=nclust,
                                gpu=device,
                                parallel=True,
                                verbose=2)
        mcmc.sample(burnin, nburn=0, tune_interval=5)
        imcmc = HDPNormalMixture(mcmc, verbose=2)
        imcmc.sample(niter, nburn=0, ident=True)

        del mcmc
        del imcmc