def estimate(self, Y=None, use_sample=False): ''' return estim_y: (data_dim, data_len), waveform sequence estim_s: (data_len), state sequence which contains 0 to n_states - 1 vb: float value, valiational bound ''' estim_s = self.qs.estimate(Y, self.theta) estim_y = zeros((self.data_dim, len(estim_s))) if use_sample: for k in range(self.n_states): idx = estim_s == k data_len = estim_y[:, idx].shape[-1] mu, R = self.theta.qmur.post.sample() estim_y[:, idx] = mvnrand( mu[:, k], inv(R[:, :, k]), size=data_len).T else: for k in range(self.n_states): idx = estim_s == k data_len = estim_y[:, idx].shape[-1] m = self.theta.qmur.post.mu[:, k] c = inv(self.theta.qmur.post.expt_prec[:, :, k]) estim_y[:, idx] = mvnrand(m, c, size=data_len).T vb = self.calc_vb() return estim_y, estim_s, vb
def samples(self, data_len, by_posterior=True): ''' Y, S, mu, R, pi, A = hmm.sample(data_len) @argvs data_len: data length use_uniform_s: use uniform S if True, use sampled S if False @return Y: sampled observations, np.array(data_dim, data_len) S: sampled states, np.array(n_states, data_len) Z: sampled categories, np.array(n_cat, data_len) prms: [mu, R, pi, A, psi] mu: sampled mu: np.array(data_dim, n_states) R: sampled R: np.array(data_dim, data_dim, n_states) pi: sampled pi: np.array(n_states) A: sampled A: np.array(n_states, n_states) ''' # mu, R, pi, A = self.theta.samples(1, by_posterior) mu, R, pi, A = self.theta.expectations(by_posterior) S = self.qs.samples(data_len, pi, A) Y = zeros((self.data_dim, data_len)) for t in range(data_len): k = S[t] cov = inv(R[:, :, k]) Y[:, t] = mvnrand(mu[:, k], cov) return Y, S, [mu, R, pi, A]
def samples(self, data_len, **args): ''' gmm.samples(data_len, use_uniform_s=False) get sampled data from the model @argv data_len: sample data length, int return_uniform_s: return uniform S or sampled S, boo, default, False by_posterior: sample from post or prior, bool, default True @return Y: sampled observation, np.array(data_dim, data_len) S: sampled hidden variables, np.array(data_len) mu: sampled mu, np.array(data_dim, n_states) R: sampled R, np.array(data_dim, data_dim, n_states) pi: sampled pi, np.array(n_states) ''' by_posterior = args.get('by_posterior', True) # mu, R, pi = self.theta.samples(by_posterior) mu, R, pi = self.theta.expectations(by_posterior) S = self.qs.samples(data_len, pi) Y = zeros((self.data_dim, data_len)) cov = inv(R.transpose(2, 0, 1)).transpose(1, 2, 0) for t in range(data_len): k = S[t] Y[:, t] = mvnrand(mu[:, k], cov[:, :, k]) return Y, S, [mu, R, pi]
def fgp(xx, kernel): """[3] カーネル行列を用いて、多次元正規分布N(μ,Σ)に従う、yの値(1次元ベクトル)を計算する。 :param xx: :param kernel: :return: """ N = len(xx) K = kernel_matrix(xx, kernel) # ライブラリにて、多次元正規分布N(μ,Σ)に従う正規乱数を生成する y = mvnrand(np.zeros(N), K) return y
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None): """ Run experiment on 4 state, two group synthetic data. name : Name of experiment. datadir : Path to directory containing data. datafn : Prefix name to files that data and missing masks are stored in. K : Number of components in HMM. expdir : Path to directory to store experiment results. If None (default), then a directory, `name`_results, is made in the current directory. nfolds : Number of folds to generate if datafn is None. nrestarts : Number of random initial parameters. seed : Random number seed. """ # Set seed for reproducibility np.random.seed(seed) # Generate/Load data and folds (missing masks) # These are the emission distributions for the following tests if not os.path.exists(datadir): raise RuntimeError("Could not find datadir: %s" % (datadir,)) else: if not os.path.isdir(datadir): raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,)) if datafn is None: datafn = name dpath = os.path.join(datadir, datafn + "_data.txt") mpath = os.path.join(datadir, datafn + "_fold*.txt") try: X = np.loadtxt(dpath) except IOError: if os.path.exists(dpath) and not os.path.isdir(dpath): raise RuntimeError("Could not load data: %s" % (dpath,)) masks = glob.glob(mpath) if len(masks) == 0: masks = [None] # Initialize parameter possibilities obs_mean = np.mean(X, axis=0) mu_0 = obs_mean sigma_0 = 0.75*np.cov(X.T) # Vague values that keeps covariance matrices p.d. kappa_0 = 0.01 nu_0 = 4 prior_init = np.ones(K) prior_tran = np.ones((K,K)) N, D = X.shape rand_starts = list() for r in xrange(nrestarts): init_means = np.empty((K,D)) init_cov = list() for k in xrange(K): init_means[k,:] = mvnrand(mu_0, cov=sigma_0) init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0)) # We use prior b/c mu and sigma are sampled here prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0, mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0) for k in xrange(K)]) init_init = np.random.rand(K) init_init /= np.sum(init_init) init_tran = np.random.rand(K,K) init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis] # Make dict with initial parameters to pass to experiment. pd = {'init_init': init_init, 'init_tran': init_tran, 'prior_init': prior_init, 'prior_tran': prior_tran, 'prior_emit': prior_emit, 'maxit': maxit, 'verbose': verbose} rand_starts.append(pd) # Compute Cartesian product of random starts with other possible parameter # values, make a generator to fill in entries in the par dicts created # above, and then construct the par_list by calling the generator with the # Cartesian product iterator. par_prod_iter = itertools.product(rand_starts, taus, kappas, reuse_msg, grow_buffer, Ls, correct_trans) def gen_par(par_tuple): d = copy.copy(par_tuple[0]) d['tau'] = par_tuple[1] d['kappa'] = par_tuple[2] d['reuseMsg'] = par_tuple[3] d['growBuffer'] = par_tuple[4] d['metaobs_half'] = par_tuple[5] d['correctTrans'] = par_tuple[6] d['mb_sz'] = 100//(2*par_tuple[5]+1) return d # Call gen_par on each par product to pack into dictionary to pass to # experiment. par_list = itertools.imap(gen_par, par_prod_iter) # Create ExperimentSequential and call run_exper dname = os.path.join(datadir, datafn + "_data.txt") exp = ExpSeq(datafn, dname, run_exper, par_list, masks=masks, exper_dir=expdir) exp.run()
def fgp(xx, kernel): N = len(xx) K = kernel_matrix(xx, kernel) return mvnrand(np.zeros(N), K)
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None): """ Run experiment on 4 state, two group synthetic data. name : Name of experiment. datadir : Path to directory containing data. datafn : Prefix name to files that data and missing masks are stored in. K : Number of components in HMM. expdir : Path to directory to store experiment results. If None (default), then a directory, `name`_results, is made in the current directory. nfolds : Number of folds to generate if datafn is None. nrestarts : Number of random initial parameters. seed : Random number seed. """ # Set seed for reproducibility np.random.seed(seed) # Generate/Load data and folds (missing masks) # These are the emission distributions for the following tests if not os.path.exists(datadir): raise RuntimeError("Could not find datadir: %s" % (datadir,)) else: if not os.path.isdir(datadir): raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,)) if datafn is None: datafn = name dpath = os.path.join(datadir, datafn + "_data.txt") mpath = os.path.join(datadir, datafn + "_fold*.txt") try: X = np.loadtxt(dpath) except IOError: if os.path.exists(dpath) and not os.path.isdir(dpath): raise RuntimeError("Could not load data: %s" % (dpath,)) masks = glob.glob(mpath) if len(masks) == 0: masks = [None] # Initialize parameter possibilities obs_mean = np.mean(X, axis=0) mu_0 = obs_mean sigma_0 = 0.75*np.cov(X.T) # Vague values that keeps covariance matrices p.d. kappa_0 = 0.01 nu_0 = 4 prior_init = np.ones(K) prior_tran = np.ones((K,K)) rand_starts = list() for r in xrange(nrestarts): init_means = np.empty((K,D)) init_cov = list() for k in xrange(K): init_means[k,:] = mvnrand(mu_0, cov=sigma_0) init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0)) # We use prior b/c mu and sigma are sampled here prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0, mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0) for k in xrange(K)]) init_init = np.random.rand(K) init_init /= np.sum(init_init) init_tran = np.random.rand(K,K) init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis] # Make dict with initial parameters to pass to experiment. pd = {'init_init': init_init, 'init_tran': init_tran, 'prior_init': prior_init, 'prior_tran': prior_tran, 'prior_emit': prior_emit, 'maxit': maxit} rand_starts.append(pd) # Compute Cartesian product of random starts with other possible parameter # values, make a generator to fill in entries in the par dicts created # above, and then construct the par_list by calling the generator with the # Cartesian product iterator. par_prod_iter = itertools.product(rand_starts, taus, kappas, Ls) def gen_par(par_tuple): d = copy.copy(par_tuple[0]) d['tau'] = par_tuple[1] d['kappa'] = par_tuple[2] d['metaobs_half'] = par_tuple[3] return d # Call gen_par on each par product to pack into dictionary to pass to # experiment. par_list = itertools.imap(gen_par, par_prod_iter) # Create ExperimentSequential and call run_exper dname = os.path.join(datadir, datafn + "_data.txt") exp = ExpSeq('exper_synth_4statedd', dname, run_exper, par_list, masks=masks, exper_dir=expdir) exp.run()