Ejemplo n.º 1
0
def initialize(deep_map, X,num_pseudo_params):
    smart_map = {}
    for layer,layer_map in deep_map.iteritems():
        smart_map[layer] = {}
        for unit,gp_map in layer_map.iteritems():
            smart_map[layer][unit] = {}
            cov_params = gp_map['cov_params']
            lengthscales = cov_params[1:]
            if layer == 0:
                pairs = itertools.combinations(X, 2)
                dists = np.array([np.abs(p1-p2) for p1,p2 in pairs])
                smart_lengthscales = np.array([np.log(np.median(dists[:,i])) for i in xrange(len(lengthscales))])
                kmeans = KMeans(n_clusters = num_pseudo_params, init = 'k-means++')
                fit = kmeans.fit(X)
                smart_x0 = fit.cluster_centers_
                #inds = npr.choice(len(X), num_pseudo_params, replace = False)
                #smart_x0 = np.array(X)[inds,:]
                smart_y0 = np.ndarray.flatten(smart_x0) 
                #smart_y0 = np.array(y)[inds]
                smart_noise_scale = np.log(np.var(smart_y0))
            else:
                smart_x0 = gp_map['x0']
                smart_y0 = np.ndarray.flatten(smart_x0[:,0])
                smart_lengthscales = np.array([np.log(1) for i in xrange(len(lengthscales))])
                smart_noise_scale = np.log(np.var(smart_y0))
            gp_map['cov_params'] = np.append(cov_params[0],smart_lengthscales)
            gp_map['x0'] = smart_x0
            gp_map['y0'] = smart_y0
            #gp_map['noise_scale'] = smart_noise_scale
            smart_map[layer][unit] = gp_map
    smart_params = pack_deep_params(smart_map)
    return smart_params
Ejemplo n.º 2
0
def fit_maxlike(data, r_guess):
    # follows Wikipedia's section on negative binomial max likelihood
    assert np.var(data) > np.mean(data), "Likelihood-maximizing parameters don't exist!"
    loglike = lambda r, p: np.sum(negbin_loglike(r, p, data))
    p = lambda r: np.sum(data) / np.sum(r+data)
    rprime = lambda r: grad(loglike)(r, p(r))
    r = newton(rprime, r_guess)
    return r, p(r)
def fit_maxlike(x, r_guess):
    # follows Wikipedia's section on negative binomial max likelihood
    assert np.var(x) > np.mean(
        x), "Likelihood-maximizing parameters don't exist!"

    def loglike(r, p): return np.sum(negbin_loglike(r, p, x))
    def p(r): return np.sum(x) / np.sum(r + x)
    def rprime(r): return grad(loglike)(r, p(r))
    r = newton(rprime, r_guess)
    return r, p(r)
Ejemplo n.º 4
0
 def initialize(self, datas, inputs=None, masks=None, tags=None):
     # Initialize with KMeans
     from sklearn.cluster import KMeans
     data = np.concatenate(datas)
     km = KMeans(self.K).fit(data)
     self.mus = km.cluster_centers_
     sigmas = np.array(
         [np.var(data[km.labels_ == k], axis=0) for k in range(self.K)])
     self.inv_sigmas = np.log(sigmas + 1e-8)
     self.inv_nus = np.log(4) * np.ones(self.K)
Ejemplo n.º 5
0
def whiten_data(X):
  '''
  Returns copy of dataset with zero mean and identity covariance
  '''
  X = X.copy()
  X = X - X.mean(axis=0)[np.newaxis,:]
  stds = np.sqrt(np.var(X, axis=0))[np.newaxis,:]
  stds[np.where(stds == 0)] = 1.0
  X = X / stds
  return X
Ejemplo n.º 6
0
 def prediction_test(params, X, num_samples, location=0.0, scale=1.0):
     w_mean, w_chol = unpack_var_params(params)
     K = num_samples
     epsilon = rs.randn(num_weights, K)
     R_epsilon = np.dot(w_chol, epsilon)
     samples = R_epsilon.T + w_mean
     outputs = predictions(samples, X) * scale + location
     pred_mean = np.mean(outputs, axis=0)
     pred_var = np.var(outputs, axis=0)
     return pred_mean, pred_var
Ejemplo n.º 7
0
def optimize_hyperparameters(X, Y, inducing, kern, likelihood, messages=True):
  if type(inducing) is np.ndarray and len(inducing.shape) == 2:
    m = GPy.core.SparseGP(X, Y, inducing,
                        kern, #GPy.kern.RBF(input_dim=X.shape[1], lengthscale=sq_length_scales.copy(), variance=kernel_var, ARD=True),
                        likelihood) #GPy.likelihoods.Gaussian(variance=likelihood_var))
  else:
    m = GPy.core.SparseGP(X, Y, X[np.random.randint(X.shape[0], size=inducing), :].copy(),
                          kern, #GPy.kern.RBF(input_dim=X.shape[1], ARD=True),
                          likelihood) #GPy.likelihoods.Gaussian())
  try:
    m[''].constrain_bounded(1e-6, 1e6)
    m.likelihood.variance.constrain_bounded(1e-6, 10*np.var(Y))
    m.kern.variance.constrain_bounded(1e-6, 10*np.var(Y))
    #m.optimize('fmin_tnc', max_iters=10000, messages=True, ipython_notebook=False)
    m.optimize('lbfgsb', max_iters=10000, messages=messages, ipython_notebook=False)
    # adam, lbfgsb, 
  except:
    pass #if constraining/optimization fails (GPy/paramz sometimes fails when constraining variables...) just use whatever the current solution is

  return m.kern, m.likelihood #np.asarray(m.rbf.lengthscale), np.asscalar(m.rbf.variance), np.asscalar(m.likelihood.variance)
def get_e_num_large_clusters_from_ez(e_z,
                                    threshold = 0,
                                    n_samples = None,
                                    unif_samples = None):
    """
    Computes the expected number of clusters with at least t
    observations from cluster belongings e_z.
    Parameters
    ----------
    e_z : ndarray
        Array whose (n, k)th entry is the probability of the nth
        datapoint belonging to cluster k
    n_obs : int
        Number of observations in a dataset.
    n_samples : int
        Number of Monte Carlo samples used to compute the expected
        number of clusters.
    unv_norm_samples : ndarray, optional
        The user may pass in a precomputed array of uniform random variables
        on which the reparameterization trick is applied to compute the
        expected number of clusters.
    Returns
    -------
    float
        The expected number of clusters with at least ``threshold`` observations
        in a dataset the same size as e_z
    """

    n_obs = e_z.shape[0]
    n_clusters = e_z.shape[1]

    # draw uniform samples
    if unif_samples is None:
        assert n_samples is not None
        unif_samples = np.random.random((n_obs, n_samples))

    else:
        assert unif_samples is not None
        assert unif_samples.shape[0] == n_obs

    n_samples = unif_samples.shape[1]
    e_z_cumsum = np.cumsum(e_z, axis = 1)

    num_heavy_clusters_vec = np.zeros(n_samples)

    # z_sample is a n_obs x n_samples matrix of cluster belongings
    z_sample = _get_clusters_from_ez_and_unif_samples(e_z_cumsum, unif_samples)

    for i in range(n_clusters):
        # get number of clusters with at least enough points above the threshold
        num_heavy_clusters_vec += np.sum(z_sample == i, axis = 0) > threshold

    return np.mean(num_heavy_clusters_vec), np.var(num_heavy_clusters_vec)
Ejemplo n.º 9
0
 def loss(localAlphaHats):
     lossVal = 0
     #         localAlphaHats = 1 / (1 + np.exp(-1 * localAlphaHats))
     for wi, aH in zip(w, globalAlphaHats):
         tilde = 1 / np.sum(np.multiply(n, wi))
         wiXA = np.multiply(wi, localAlphaHats)
         tilde = tilde * np.sum(np.multiply(wiXA, n))
         lossVal = lossVal + .5 * np.square(aH - tilde)
     lossVal = lossVal + varLambda * np.sum(np.var(localAlphaHats, axis=1))
     lossVal = lossVal + anchorLambda * np.sum(
         np.square(localAlphaHats - a0))
     return lossVal
Ejemplo n.º 10
0
 def get_error_and_ll(params, X, y, num_samples, location=0.0, scale=1.0):
     w_mean, w_std = unpack_var_params(params)
     noise_var_scale = noise_var * scale**2
     K = num_samples
     samples = rs.randn(K, num_weights) * w_std + w_mean
     outputs = predictions(samples, X) * scale + location
     log_factor = -0.5 * np.log(2 * math.pi * noise_var_scale) - 0.5 * (
         y - outputs)**2 / noise_var_scale
     ll = np.mean(logsumexp(log_factor - np.log(K), 0))
     pred_mean = np.mean(outputs, axis=0)
     error = np.sqrt(np.mean((y - pred_mean)**2))
     pred_var = np.var(outputs, axis=0)
     return pred_mean, pred_var, error, ll
Ejemplo n.º 11
0
def test_gamma_method_no_windowing():
    for iteration in range(50):
        obs = pe.Obs(
            [np.random.normal(1.02, 0.02, 733 + np.random.randint(1000))],
            ['ens'])
        obs.gamma_method(S=0)
        assert obs.e_tauint['ens'] == 0.5
        assert np.isclose(
            np.sqrt(np.var(obs.deltas['ens'], ddof=1) / obs.shape['ens']),
            obs.dvalue)
        obs.gamma_method(S=1.1)
        assert obs.e_tauint['ens'] > 0.5
    with pytest.raises(Exception):
        obs.gamma_method(S=-0.2)
Ejemplo n.º 12
0
    def sufficient_shading_variability(self):
        """ Ensure there is sufficient variability in the shading. """
        samples = []
        for idy, row in enumerate(self.patch):
            for idx, pixel in enumerate(row):
                if self.support_matrix[idy][idx]:
                    direction = pixel - self.point
                    samples.append(direction)

        samples = np.array(samples)
        variance = np.var(samples)
        score = np.sqrt(variance) / self.transmission

        return score > thresholds.shading
Ejemplo n.º 13
0
def monte_carlo_se_moving(chains, warmup=0.5, param_idx=0):
    """
    Compute the monte carlo standard error for a variational parameter
    at each iterate using all iterates before that iterate.
    The MCSE is computed using eq (5) of https://arxiv.org/pdf/1903.08008.pdf

    Here, MCSE(\lambda_i)=  sqrt(V(\lambda_i)/Seff)
    where ESS is the effective sample size computed using eq(11).
    MCSE is from 100th to the last iterate using all the chains.

    Parameters
    ----------
    iterate_chains : multi-dimensional array, shape=(n_chains, n_iters, n_var_params)

    warmup : warmup iterates

    param_idx : index of the variational parameter

    Returns
    -------
    mcse_combined_list : array of mcse values for variational parameter with param_idx
    """

    n_chains, N_iters = chains.shape[0], chains.shape[1]

    if warmup < 1:
        warmup = int(warmup * N_iters)

    if warmup > N_iters - 1:
        raise ValueError('Warmup should be less than number of iterates ..')

    if (N_iters - warmup) % 2:
        warmup = int(warmup + 1)

    chains = chains[:, warmup:, param_idx]
    mcse_combined_list = np.zeros(N_iters)
    Neff, _, _, _ = autocorrelation(chains, warmup=0, param_idx=param_idx)

    for i in range(101, N_iters):
        chains_sub = chains[:, :i]
        n_chains, n_iters = chains_sub.shape[0], chains_sub.shape[1]
        chains_flat = np.reshape(chains_sub, (n_chains * i, 1))
        variances_combined = np.var(chains_flat, ddof=1, axis=0)
        Neff, _, _, _ = autocorrelation(chains[:, :i, :],
                                        warmup=0,
                                        param_idx=param_idx)
        mcse_combined = np.sqrt(variances_combined / Neff)
        mcse_combined_list[i] = mcse_combined
    return np.array(mcse_combined_list)
Ejemplo n.º 14
0
def initialize(deep_map, X, num_pseudo_params):
    smart_map = {}
    for layer, layer_map in deep_map.iteritems():
        smart_map[layer] = {}
        for unit, gp_map in layer_map.iteritems():
            smart_map[layer][unit] = {}
            cov_params = gp_map['cov_params']
            lengthscales = cov_params[1:]
            if layer == 0:
                pairs = itertools.combinations(X, 2)
                dists = np.array([np.abs(p1 - p2) for p1, p2 in pairs])
                smart_lengthscales = np.array([
                    np.log(np.median(dists[:, i]))
                    for i in xrange(len(lengthscales))
                ])
                kmeans = KMeans(n_clusters=num_pseudo_params, init='k-means++')
                fit = kmeans.fit(X)
                smart_x0 = fit.cluster_centers_
                #inds = npr.choice(len(X), num_pseudo_params, replace = False)
                #smart_x0 = np.array(X)[inds,:]
                smart_y0 = np.ndarray.flatten(smart_x0)
                #smart_y0 = np.array(y)[inds]
                smart_noise_scale = np.log(np.var(smart_y0))
            else:
                smart_x0 = gp_map['x0']
                smart_y0 = np.ndarray.flatten(smart_x0[:, 0])
                smart_lengthscales = np.array(
                    [np.log(1) for i in xrange(len(lengthscales))])
                smart_noise_scale = np.log(np.var(smart_y0))
            gp_map['cov_params'] = np.append(cov_params[0], smart_lengthscales)
            gp_map['x0'] = smart_x0
            gp_map['y0'] = smart_y0
            #gp_map['noise_scale'] = smart_noise_scale
            smart_map[layer][unit] = gp_map
    smart_params = pack_deep_params(smart_map)
    return smart_params
Ejemplo n.º 15
0
def get_basic_kernel(t, y, yerr):
    kernel = terms.SHOTerm(
        log_S0=np.log(np.var(y)),
        log_Q=-np.log(4.0),
        log_omega0=np.log(2 * np.pi / 10.),
        bounds=dict(
            log_S0=(-20.0, 10.0),
            log_omega0=(np.log(2 * np.pi / 80.0), np.log(2 * np.pi / 2.0)),
        ),
    )
    kernel.freeze_parameter('log_Q')

    # Finally some jitter
    kernel += terms.JitterTerm(log_sigma=np.log(np.median(yerr)),
                               bounds=[(-20.0, 5.0)])

    return kernel
Ejemplo n.º 16
0
    def total_likelihood(self, data):
        if self.is_fitted:
            total_likelihood = np.zeros([len(self.graph), data.shape[0]])
            total_std_log_Z = np.zeros([len(self.graph), data.shape[0]])
            for e, node in enumerate(self.graph):
                x, y = self.split_cond(data, node)
                log_Z, std_log_Z = self.log_partition(y, x, e)
                cond_likelihood = self.log_pdf(y, x, e) - log_Z
                total_likelihood[e, :] = cond_likelihood
                total_std_log_Z[e, :] = std_log_Z

            likelihood = np.mean(np.sum(total_likelihood, axis=0))
            std_likelihood = np.var(np.sum(
                total_likelihood, axis=0)) + np.mean(
                    np.sum(total_std_log_Z**2, axis=0))
            std_likelihood = np.sqrt(std_likelihood)
            return likelihood, std_likelihood
Ejemplo n.º 17
0
    def smart_initialize_params(init_params):
        layer_params, x0, y0 = unpack_all_params(init_params)
        # Initialize the first length scale parameter as the median distance between points
        pairs = itertools.combinations(X, 2)
        dists = np.array([np.linalg.norm(np.array([p1])- np.array([p2])) for p1,p2 in pairs])
        layer_params[0][2] = np.log(np.var(y))
        layer_params[0][3] = np.log(np.median(dists))

        # Initialize the pseudo inputs for the first layer by sampling from the data, the pseudo outputs equal to the inputs
        x0[0] = np.ndarray.flatten(np.array(X)[rs.choice(len(X), num_pseudo_params, replace=False),:])
        y0[0] = x0[0]
        
        # For every other layer, set the inducing outputs to the inducing inputs (which are sampled from N(0,.01)) and lengthscale large 
        for layer in xrange(1,n_layers):
            y0[layer] = x0[layer]
            layer_params[layer][3] = np.log(1)

        return pack_all_params(layer_params, x0, y0)
Ejemplo n.º 18
0
def get_rotation_kernel(t, y, yerr, period, min_period, max_period):
    kernel = MixtureOfSHOsTerm(
        log_a=np.log(np.var(y)),  ## amplitude of the main peak
        log_Q1=np.log(
            15
        ),  ## decay timescale of the main peak (width of the spike in the FT)
        mix_par=4.,  ## height of second peak relative to first peak
        log_Q2=np.log(15),  ## decay timescale of the second peak
        log_P=np.log(
            period),  ## period (second peak is constrained to twice this)
        bounds=dict(
            log_a=(-20.0, 10.0),
            log_Q1=(0., 10.0),
            mix_par=(-5.0, 10.0),
            log_Q2=(0., 10.0),
            log_P=(None, None),  # np.log(min_period), np.log(max_period)),
        ))
    return kernel
Ejemplo n.º 19
0
    def initialize(self, datas, inputs=None, masks=None, tags=None):
        # Initialize with linear regressions
        from sklearn.linear_model import LinearRegression
        data = np.concatenate(datas) 
        input = np.concatenate(inputs)
        T = data.shape[0]

        for k in range(self.K):
            ts = npr.choice(T-self.lags, replace=False, size=(T-self.lags)//self.K)
            x = np.column_stack([data[ts + l] for l in range(self.lags)] + [input[ts]])
            y = data[ts+self.lags]
            lr = LinearRegression().fit(x, y)
            self.As[k] = lr.coef_[:, :self.D * self.lags]
            self.Vs[k] = lr.coef_[:, self.D * self.lags:]
            self.bs[k] = lr.intercept_
            
            resid = y - lr.predict(x)
            sigmas = np.var(resid, axis=0)
            self.inv_sigmas[k] = np.log(sigmas + 1e-8)
Ejemplo n.º 20
0
def get_rotation_gp(t, y, yerr, period, min_period, max_period):
    kernel = get_basic_kernel(t, y, yerr)
    kernel += MixtureOfSHOsTerm(log_a=np.log(np.var(y)),
                                log_Q1=np.log(15),
                                mix_par=-1.0,
                                log_Q2=np.log(15),
                                log_P=np.log(period),
                                bounds=dict(
                                    log_a=(-20.0, 10.0),
                                    log_Q1=(-0.5 * np.log(2.0), 11.0),
                                    mix_par=(-5.0, 5.0),
                                    log_Q2=(-0.5 * np.log(2.0), 11.0),
                                    log_P=(np.log(min_period),
                                           np.log(max_period)),
                                ))

    gp = celerite.GP(kernel=kernel, mean=0.)
    gp.compute(t)
    return gp
def fprop(
    tau,
    prev_taus,
    n_layers,
    n_hid_units,
    is_ResNet,
    batch_norm=True,
):
    n_prev_taus = prev_taus.shape[0]

    prev_hidden = relu(np.dot(X, norm.rvs(size=(1, n_hid_units))))
    h = 0.

    for layer_idx in range(n_layers):
        # if not a resNet and the rest of the scales are 0, break the loop
        if layer_idx > n_prev_taus and not is_ResNet:
            break

        # sample weights
        sigma = 0.
        eps = norm.rvs(size=(n_hid_units, n_hid_units))

        if layer_idx < n_prev_taus: sigma = prev_taus[layer_idx]
        elif layer_idx == n_prev_taus:
            sigma = tau
            if sigma < 0: break
        w_hat = sigma * eps

        # activatiom
        a = np.dot(prev_hidden, w_hat)

        # batchnorm (no trainable params)
        if batch_norm:
            a = (a - np.mean(a, axis=0)) / np.sqrt(np.var(a, axis=0) + 10)

        if is_ResNet:
            h = h + relu(a)
        else:
            h = relu(a)
        prev_hidden = h

    w_out_hat = norm.rvs(size=(n_hid_units, 1)) * n_hid_units**(-.5)
    return np.dot(prev_hidden, w_out_hat)
Ejemplo n.º 22
0
def MCSE(sample):
    """
    Compute the Monte Carlo standard error (MCSE)

    Parameters
    ----------
    samples : `numpy.ndarray(n_iters, 2*dim)`
        An array containing variational samples

    Returns
    -------
    mcse : `numpy.ndarray(2*dim)`
        MCSE for each variational parameter

    """
    n_iters, d = sample.shape
    sd_dev = np.sqrt(np.var(sample,ddof=1,axis=0))
    eff_samp = [ess(sample[:,i].reshape(1,n_iters)) for i in range(d)]
    mcse = sd_dev/np.sqrt(eff_samp)
    return eff_samp, mcse
Ejemplo n.º 23
0
def get_basic_kernel(t, y, yerr, period=False):
    if not period:
        period = 0.5
    kernel = terms.SHOTerm(
        log_S0=np.log(np.var(y)),
        log_Q=-np.log(4.0),
        log_omega0=np.log(2 * np.pi / 20.),
        bounds=dict(
            log_S0=(-20.0, 10.0),
            log_omega0=(np.log(2 * np.pi / 100.), np.log(2 * np.pi / (10))),
        ),
    )
    kernel.freeze_parameter('log_Q')
    ##  tau = 2*np.exp(-1*np.log(4.0))/np.exp(log_omega0)

    # Finally some jitter
    ls = np.log(np.median(yerr))
    kernel += terms.JitterTerm(log_sigma=ls, bounds=[(ls - 5.0, ls + 5.0)])

    return kernel
Ejemplo n.º 24
0
    def smart_initialize_params(init_params):
        layer_params, x0, y0 = unpack_all_params(init_params)
        # Initialize the first length scale parameter as the median distance between points
        pairs = itertools.combinations(X, 2)
        dists = np.array([
            np.linalg.norm(np.array([p1]) - np.array([p2])) for p1, p2 in pairs
        ])
        layer_params[0][2] = np.log(np.var(y))
        layer_params[0][3] = np.log(np.median(dists))

        # Initialize the pseudo inputs for the first layer by sampling from the data, the pseudo outputs equal to the inputs
        x0[0] = np.ndarray.flatten(
            np.array(X)[
                rs.choice(len(X), num_pseudo_params, replace=False), :])
        y0[0] = x0[0]

        # For every other layer, set the inducing outputs to the inducing inputs (which are sampled from N(0,.01)) and lengthscale large
        for layer in xrange(1, n_layers):
            y0[layer] = x0[layer]
            layer_params[layer][3] = np.log(1)

        return pack_all_params(layer_params, x0, y0)
Ejemplo n.º 25
0
def score_estimator(alpha, m, x, K, alphaz, S=100):
    """
    Form score function estimator based on samples lmbda.
    """
    N = x.shape[0]
    if x.ndim == 1:
        D = 1
    else:
        D = x.shape[1]
    num_z = N * np.sum(K)
    L = K.shape[0]
    gradient = np.zeros((alpha.shape[0], 2))
    f = np.zeros((2 * S, alpha.shape[0], 2))
    h = np.zeros((2 * S, alpha.shape[0], 2))
    for s in range(2 * S):
        lmbda = npr.gamma(alpha, 1.)
        lmbda[lmbda < 1e-300] = 1e-300
        zw = m * lmbda / alpha
        lQ = logQ(zw, alpha, m)
        gradLQ = grad_logQ(zw, alpha, m)

        lP = logp(zw, K, x, alphaz)
        temp = lP - np.sum(lQ)
        f[s, :, :] = temp * gradLQ

        h[s, :, :] = gradLQ

    # CV
    covFH = np.zeros((alpha.shape[0], 2))
    covFH[:, 0] = np.diagonal(
        np.cov(f[S:, :, 0], h[S:, :, 0], rowvar=False)[:alpha.shape[0],
                                                       alpha.shape[0]:])
    covFH[:, 1] = np.diagonal(
        np.cov(f[S:, :, 1], h[S:, :, 1], rowvar=False)[:alpha.shape[0],
                                                       alpha.shape[0]:])
    a = covFH / np.var(h[S:, :, :], axis=0)

    return np.mean(f[:S, :, :], axis=0) - a * np.mean(h[:S, :, :], axis=0)
Ejemplo n.º 26
0
    def callback(combined_params, t, combined_gradient):
        params, est_params = combined_params
        grad_params, grad_est = combined_gradient
        log_temperature, nn_params = est_params
        temperatures.append(np.exp(log_temperature))
        if t % 10 == 0:
            objective_val, grads, est_grads = mc_objective_and_var(
                combined_params, t)
            print("Iteration {} objective {}".format(t,
                                                     np.mean(objective_val)))
            ax1.cla()
            ax1.plot(expit(params), 'r')
            ax1.set_ylabel('parameter values')
            ax1.set_xlabel('parameter index')
            ax1.set_ylim([0, 1])
            ax2.cla()
            ax2.plot(grad_params, 'g')
            ax2.set_ylabel('average gradient')
            ax2.set_xlabel('parameter index')
            ax3.cla()
            ax3.plot(np.var(grads), 'b')
            ax3.set_ylabel('gradient variance')
            ax3.set_xlabel('parameter index')
            ax4.cla()
            ax4.plot(temperatures, 'b')
            ax4.set_ylabel('temperature')
            ax4.set_xlabel('iteration')

            ax5.cla()
            xrange = np.linspace(0, 1, 200)
            f_tilde = lambda x: nn_predict(nn_params, x)
            f_tilde_map = map_and_stack(make_one_d(f_tilde, slice_dim, params))
            ax5.plot(xrange, f_tilde_map(logit(xrange)), 'b')
            ax5.set_ylabel('1d slide of surrogate')
            ax5.set_xlabel('relaxed sample')
            plt.draw()
            plt.pause(1.0 / 30.0)
Ejemplo n.º 27
0
    def normalize0(self, data, axis=0):

        assert (np.isfinite(data).all() == True)

        mean = np.mean(data, axis=axis)
        var = np.var(data, axis=axis)
        stdn = np.std(data, axis=axis)
        minimum_arr = np.amin(data, axis=axis, keepdims=True)
        maximum_arr = np.amax(data, axis=axis, keepdims=True)
        normalize_state = {
            "mean": mean,
            "var": var,
            "min": minimum_arr,
            "max": maximum_arr,
            "stdn": stdn
        }

        if (self.config.NN_ZERO_MEAN_NORMALIZE == True):
            normalized = (data - mean) / (stdn + 0.00001)
        else:
            normalized = (data - minimum_arr) / (maximum_arr - minimum_arr +
                                                 0.0001)

        return normalized.reshape(data.shape), normalize_state
Ejemplo n.º 28
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []
                if not self.shared_alpha:
                    for u in range(self.n_unique):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                u]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[u].params[1:])
                        ar_resid.append(ar_mod[u].resid)
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                              mf]).fit(self.n_lags))
                    ar_alpha.append(ar_mod[0].params[1:])
                    ar_resid.append(ar_mod[0].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    mu_init[u] = kmeans[u, 0] - np.dot(
                            np.repeat(kmeans[u, 0], self.n_lags),
                            ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:
                precision_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    if not self.shared_alpha:
                        maxVar = np.max([np.var(ar_resid[i]) for i in
                                        range(self.n_unique)])
                    else:
                        maxVar = np.var(ar_resid[0])
                    precision_init[u] = 1.0 / maxVar
                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                alpha_init = np.zeros((self.n_unique, self.n_lags))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    alpha_init[u, :] = ar_alpha[ar_idx]
                self.alpha_ = alpha_init
Ejemplo n.º 29
0
def autocorrelation(iterate_chains, warmup=0.5, param_idx=0, lag_max=100):
    """
    Compute the autocorrelation and ESS for a variational parameter using FFT.
    where ESS is the effective sample size computed using eq(10) and (11) of https://arxiv.org/pdf/1903.08008.pdf
    MCSE is from 100th to the last iterate using all the chains.

    Parameters
    ----------
    iterate_chains : multi-dimensional array, shape=(n_chains, n_iters, n_var_params)

    warmup : warmup iterates

    param_idx : index of the variational parameter

    lag_max: lag value

    Returns
    -------
    neff : Effective sample size

    rho_t: autocorrelation at last lag

    autocov: auto covariance using FFT

    a: array of autocorrelation from lag t=0 to lag t=lag_max
    """
    n_iters = iterate_chains.shape[1]
    n_chains = iterate_chains.shape[0]
    if warmup < 1:
        warmup = int(warmup * n_iters)

    if warmup > n_iters - 2:
        raise ValueError('Warmup should be less than number of iterates ..')

    if (n_iters - warmup) % 2:
        warmup = int(warmup + 1)

    chains = iterate_chains[:, warmup:, param_idx]
    means = np.mean(chains, axis=1)
    variances = np.var(chains, ddof=1, axis=1)
    if n_chains == 1:
        var_between = 0
    else:
        var_between = n_iters * np.var(means, ddof=1)

    var_chains = np.mean(variances, axis=0)
    var_pooled = ((n_iters - 1.) * var_chains + var_between) / n_iters
    n_pad = int(2**np.ceil(1. + np.log2(n_iters)))
    freqs = np.fft.rfft(chains - np.expand_dims(means, axis=1), n_pad)
    #print(freqs)
    autocov = np.fft.irfft(np.abs(freqs)**2)[:, :n_iters].real
    autocov = autocov / np.arange(n_iters, 0, -1)
    rho_t = 0
    lag = 1
    a = []
    neff_array = []
    for lag in range(lag_max):
        val = 1. - (var_chains - np.mean(autocov[:, lag])) / var_pooled
        a.append(val)
        if val >= 0:
            rho_t = rho_t + val
        else:
            #break
            rho_t = rho_t

    neff = n_iters * n_chains / (1 + 2 * rho_t)
    return neff, rho_t, autocov, np.asarray(a)
Ejemplo n.º 30
0
beta_0 = 1e-5  # learning rate for the dual update
theta_sum_old = np.mat(np.zeros((n + n * n, 1)))
loss = np.mat(np.zeros((T, 1)))
theta = np.mat(np.ones((n + n * n, 1)))  # primal variable, mu + L
y = np.mat(np.ones((2, 1)))  # dual variable
pair_dist = np.mat(np.zeros((n * n, 1)))
for i in range(1, n):
    for j in range(1, n):
        if i == j:
            continue
        pair_dist[(i - 1) * n + j, :] = np.log(
            np.linalg.norm(training_data[i, :] - training_data[j, :]))

pair_dist_ordering = np.sort(pair_dist, axis=0)
mu_0 = pair_dist_ordering[n * (n + 1) / 2, :]  # hyper-parameter for sampling w
sigma_0 = np.mat(3 * np.var(pair_dist_ordering)
                 )  # hyper-parameter for sampling w, initialized by 3

for t in range(0, T):
    #i = np.random.random_integers(1, high=n, size=1)
    i = t
    print "i=", i
    Knn = np.mat(np.zeros((n, n)))  # kernel matrix
    # sample v, w
    logw = np.random.normal(mu_0, sigma_0, (d + 2, 1))
    w = np.exp(logw)
    # u_0 = w[0, :]
    u_0 = 1
    u = w[
        1:d + 1,
        0]  # pick w's row from 1 to d, the d+1-th row is not picked! [different from MATLAB]
Ejemplo n.º 31
0
def test_scale_data():
    x = np.arange(10).reshape(-1, 1)
    x = np.outer(x, np.ones(5))
    x = scale_data(x, with_mean=True, with_var=True)
    assert (np.all(np.equal(np.var(x, 0), np.ones(x.shape[1]))))
Ejemplo n.º 32
0
 def batch_normalize(W):
     mu = np.mean(W, axis=0)
     var = np.var(W, axis=0)
     W = (W - mu) / np.sqrt(var + 1)
     return W
Ejemplo n.º 33
0
if __name__ == '__main__':
    filtered_means = []
    filtered_covs = []
    total_thetas = []
    n_iter = 1000

    time_series = np.round(np.power(np.sin(np.arange(10)+1),2)*10 + 10)

    model = StateSpaceModel()
    num_particles = 10
    x0 = np.random.normal(0,10,[num_particles,2]).astype(float)
    
    theta = SVGD().update(x0,0,x0,time_series, model.grad_overall, n_iter=n_iter, stepsize=0.01)
    total_thetas.append(theta)
    #theta = p(x_0|y_0)

    
   
    filtered_means.append(np.mean(theta,axis=0)[0])
    filtered_covs.append(np.var(theta,axis=0)[0])
    
    for t in range(1,len(time_series)):
      theta = SVGD().update(theta,t,theta, time_series, model.grad_overall, n_iter=n_iter, stepsize=0.01)
      total_thetas.append(theta)
      filtered_means.append(np.mean(theta,axis=0)[0])
      filtered_covs.append(np.var(theta,axis=0)[0])
    
    return_list = filtered_means + filtered_covs
    myList = ','.join(map(str,np.array(total_thetas).flatten() ))
    print (myList)
   
Ejemplo n.º 34
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                       random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u, f] = kmeans[u, f] - np.dot(
                            np.repeat(kmeans[u, f], self.n_lags),
                            ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0 / (np.var(
                            X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)
Ejemplo n.º 35
0
def empirical_l2_reg(images, hdims):
    l2 = init_gmlp(hdims, images.shape[1], 1, scale=0.)
    W_1, b_1 = l2[0]
    W_1[:] = 1. / (0.001 + np.var(images, axis=0)[:,None])
    return flatten(l2)[0]
Ejemplo n.º 36
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u,f] = kmeans[u, f] - np.dot(
                        np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)