Beispiel #1
0
 def estimate(self, Y=None, use_sample=False):
     '''
     return
     estim_y: (data_dim, data_len), waveform sequence
     estim_s: (data_len), state sequence which contains 0 to n_states - 1
     vb: float value, valiational bound
     '''
     estim_s = self.qs.estimate(Y, self.theta)
     estim_y = zeros((self.data_dim, len(estim_s)))
     if use_sample:
         for k in range(self.n_states):
             idx = estim_s == k
             data_len = estim_y[:, idx].shape[-1]
             mu, R = self.theta.qmur.post.sample()
             estim_y[:, idx] = mvnrand(
                 mu[:, k], inv(R[:, :, k]), size=data_len).T
     else:
         for k in range(self.n_states):
             idx = estim_s == k
             data_len = estim_y[:, idx].shape[-1]
             m = self.theta.qmur.post.mu[:, k]
             c = inv(self.theta.qmur.post.expt_prec[:, :, k])
             estim_y[:, idx] = mvnrand(m, c, size=data_len).T
     vb = self.calc_vb()
     return estim_y, estim_s, vb
Beispiel #2
0
 def samples(self, data_len, by_posterior=True):
     '''
     Y, S, mu, R, pi, A = hmm.sample(data_len)
     @argvs
     data_len: data length
     use_uniform_s: use uniform S if True, use sampled S if False
     @return
     Y: sampled observations, np.array(data_dim, data_len)
     S: sampled states, np.array(n_states, data_len)
     Z: sampled categories, np.array(n_cat, data_len)
     prms: [mu, R, pi, A, psi]
         mu: sampled mu: np.array(data_dim, n_states)
         R: sampled R: np.array(data_dim, data_dim, n_states)
         pi: sampled pi: np.array(n_states)
         A: sampled A: np.array(n_states, n_states)
     '''
     # mu, R, pi, A = self.theta.samples(1, by_posterior)
     mu, R, pi, A = self.theta.expectations(by_posterior)
     S = self.qs.samples(data_len, pi, A)
     Y = zeros((self.data_dim, data_len))
     for t in range(data_len):
         k = S[t]
         cov = inv(R[:, :, k])
         Y[:, t] = mvnrand(mu[:, k], cov)
     return Y, S, [mu, R, pi, A]
Beispiel #3
0
 def samples(self, data_len, **args):
     '''
     gmm.samples(data_len, use_uniform_s=False)
     get sampled data from the model
     @argv
     data_len: sample data length, int
     return_uniform_s: return uniform S or sampled S, boo, default, False
     by_posterior: sample from post or prior, bool, default True
     @return
     Y: sampled observation, np.array(data_dim, data_len)
     S: sampled hidden variables, np.array(data_len)
     mu: sampled mu, np.array(data_dim, n_states)
     R: sampled R, np.array(data_dim, data_dim, n_states)
     pi: sampled pi, np.array(n_states)
     '''
     by_posterior = args.get('by_posterior', True)
     # mu, R, pi = self.theta.samples(by_posterior)
     mu, R, pi = self.theta.expectations(by_posterior)
     S = self.qs.samples(data_len, pi)
     Y = zeros((self.data_dim, data_len))
     cov = inv(R.transpose(2, 0, 1)).transpose(1, 2, 0)
     for t in range(data_len):
         k = S[t]
         Y[:, t] = mvnrand(mu[:, k], cov[:, :, k])
     return Y, S, [mu, R, pi]
Beispiel #4
0
def fgp(xx, kernel):
    """[3] カーネル行列を用いて、多次元正規分布N(μ,Σ)に従う、yの値(1次元ベクトル)を計算する。

    :param xx:
    :param kernel:
    :return:
    """
    N = len(xx)
    K = kernel_matrix(xx, kernel)
    # ライブラリにて、多次元正規分布N(μ,Σ)に従う正規乱数を生成する
    y = mvnrand(np.zeros(N), K)
    return y
Beispiel #5
0
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None):
    """ Run experiment on 4 state, two group synthetic data.

        name : Name of experiment.

        datadir : Path to directory containing data.

        datafn : Prefix name to files that data and missing masks are stored
                 in.

        K : Number of components in HMM.

        expdir : Path to directory to store experiment results.  If None
                 (default), then a directory, `name`_results, is made in the
                 current directory.

        nfolds : Number of folds to generate if datafn is None.

        nrestarts : Number of random initial parameters.

        seed : Random number seed.
    """

    # Set seed for reproducibility
    np.random.seed(seed)

    # Generate/Load data and folds (missing masks)
    # These are the emission distributions for the following tests
    if not os.path.exists(datadir):
        raise RuntimeError("Could not find datadir: %s" % (datadir,))
    else:
        if not os.path.isdir(datadir):
            raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,))

    if datafn is None:
        datafn = name

    dpath = os.path.join(datadir, datafn + "_data.txt")
    mpath = os.path.join(datadir, datafn + "_fold*.txt")
    try:
        X = np.loadtxt(dpath)
    except IOError:
        if os.path.exists(dpath) and not os.path.isdir(dpath):
            raise RuntimeError("Could not load data: %s" % (dpath,))

    masks = glob.glob(mpath)
    if len(masks) == 0:
        masks = [None]

    # Initialize parameter possibilities

    obs_mean = np.mean(X, axis=0)
    mu_0 = obs_mean
    sigma_0 = 0.75*np.cov(X.T)

    # Vague values that keeps covariance matrices p.d.
    kappa_0 = 0.01
    nu_0 = 4

    prior_init = np.ones(K)
    prior_tran = np.ones((K,K))
    N, D = X.shape

    rand_starts = list()
    for r in xrange(nrestarts):
        init_means = np.empty((K,D))
        init_cov = list()
        for k in xrange(K):
            init_means[k,:] = mvnrand(mu_0, cov=sigma_0)
            init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0))
        # We use prior b/c mu and sigma are sampled here
        prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0,
                                       mu_0=mu_0, sigma_0=sigma_0,
                                       kappa_0=kappa_0, nu_0=nu_0)
                              for k in xrange(K)])

        init_init = np.random.rand(K)
        init_init /= np.sum(init_init)

        init_tran = np.random.rand(K,K)
        init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis]

        # Make dict with initial parameters to pass to experiment.
        pd = {'init_init': init_init, 'init_tran': init_tran,
              'prior_init': prior_init, 'prior_tran': prior_tran,
              'prior_emit': prior_emit, 'maxit': maxit, 'verbose': verbose}
        rand_starts.append(pd)

    # Compute Cartesian product of random starts with other possible parameter
    # values, make a generator to fill in entries in the par dicts created
    # above, and then construct the par_list by calling the generator with the
    # Cartesian product iterator.
    par_prod_iter = itertools.product(rand_starts, taus, kappas, reuse_msg,
                                      grow_buffer, Ls, correct_trans)

    def gen_par(par_tuple):
        d = copy.copy(par_tuple[0])
        d['tau'] = par_tuple[1]
        d['kappa'] = par_tuple[2]
        d['reuseMsg'] = par_tuple[3]
        d['growBuffer'] = par_tuple[4]
        d['metaobs_half'] = par_tuple[5]
        d['correctTrans'] = par_tuple[6]
        d['mb_sz'] = 100//(2*par_tuple[5]+1)
        return d

    # Call gen_par on each par product to pack into dictionary to pass to
    # experiment.
    par_list = itertools.imap(gen_par, par_prod_iter)

    # Create ExperimentSequential and call run_exper
    dname = os.path.join(datadir, datafn + "_data.txt")
    exp = ExpSeq(datafn, dname, run_exper, par_list,
                 masks=masks, exper_dir=expdir)
    exp.run()
def fgp(xx, kernel):
    N = len(xx)
    K = kernel_matrix(xx, kernel)
    return mvnrand(np.zeros(N), K)
def main(name, datadir, datafn, K, expdir=None, nfolds=1, nrestarts=1, seed=None):
    """ Run experiment on 4 state, two group synthetic data.

        name : Name of experiment.

        datadir : Path to directory containing data.

        datafn : Prefix name to files that data and missing masks are stored
                 in.
        
        K : Number of components in HMM.
        
        expdir : Path to directory to store experiment results.  If None
                 (default), then a directory, `name`_results, is made in the
                 current directory.
        
        nfolds : Number of folds to generate if datafn is None.

        nrestarts : Number of random initial parameters.

        seed : Random number seed.
    """

    # Set seed for reproducibility
    np.random.seed(seed)

    # Generate/Load data and folds (missing masks)
    # These are the emission distributions for the following tests
    if not os.path.exists(datadir):
        raise RuntimeError("Could not find datadir: %s" % (datadir,))
    else:
        if not os.path.isdir(datadir):
            raise RuntimeError("datadir: %s exists but is not a directory" % (datadir,))

    if datafn is None:
        datafn = name

    dpath = os.path.join(datadir, datafn + "_data.txt")
    mpath = os.path.join(datadir, datafn + "_fold*.txt")
    try:
        X = np.loadtxt(dpath)
    except IOError:
        if os.path.exists(dpath) and not os.path.isdir(dpath):
            raise RuntimeError("Could not load data: %s" % (dpath,))

    masks = glob.glob(mpath)
    if len(masks) == 0:
        masks = [None]

    # Initialize parameter possibilities

    obs_mean = np.mean(X, axis=0)
    mu_0 = obs_mean
    sigma_0 = 0.75*np.cov(X.T)

    # Vague values that keeps covariance matrices p.d.
    kappa_0 = 0.01
    nu_0 = 4

    prior_init = np.ones(K)
    prior_tran = np.ones((K,K))

    rand_starts = list()
    for r in xrange(nrestarts):
        init_means = np.empty((K,D))
        init_cov = list()
        for k in xrange(K):
            init_means[k,:] = mvnrand(mu_0, cov=sigma_0)
            init_cov.append(sample_invwishart(np.linalg.inv(sigma_0), nu_0))
        # We use prior b/c mu and sigma are sampled here
        prior_emit = np.array([Gaussian(mu=init_means[k,:], sigma=sigma_0,
                                       mu_0=mu_0, sigma_0=sigma_0,
                                       kappa_0=kappa_0, nu_0=nu_0)
                              for k in xrange(K)])

        init_init = np.random.rand(K)
        init_init /= np.sum(init_init)

        init_tran = np.random.rand(K,K)
        init_tran /= np.sum(init_tran, axis=1)[:,np.newaxis]

        # Make dict with initial parameters to pass to experiment.
        pd = {'init_init': init_init, 'init_tran': init_tran,
              'prior_init': prior_init, 'prior_tran': prior_tran,
              'prior_emit': prior_emit, 'maxit': maxit}
        rand_starts.append(pd)

    # Compute Cartesian product of random starts with other possible parameter
    # values, make a generator to fill in entries in the par dicts created
    # above, and then construct the par_list by calling the generator with the
    # Cartesian product iterator.
    par_prod_iter = itertools.product(rand_starts, taus, kappas, Ls)

    def gen_par(par_tuple):
        d = copy.copy(par_tuple[0])
        d['tau'] = par_tuple[1]
        d['kappa'] = par_tuple[2]
        d['metaobs_half'] = par_tuple[3]
        return d

    # Call gen_par on each par product to pack into dictionary to pass to
    # experiment.
    par_list = itertools.imap(gen_par, par_prod_iter)

    # Create ExperimentSequential and call run_exper
    dname = os.path.join(datadir, datafn + "_data.txt")
    exp = ExpSeq('exper_synth_4statedd', dname, run_exper, par_list,
                 masks=masks, exper_dir=expdir)
    exp.run()