def initialize(deep_map, X,num_pseudo_params): smart_map = {} for layer,layer_map in deep_map.iteritems(): smart_map[layer] = {} for unit,gp_map in layer_map.iteritems(): smart_map[layer][unit] = {} cov_params = gp_map['cov_params'] lengthscales = cov_params[1:] if layer == 0: pairs = itertools.combinations(X, 2) dists = np.array([np.abs(p1-p2) for p1,p2 in pairs]) smart_lengthscales = np.array([np.log(np.median(dists[:,i])) for i in xrange(len(lengthscales))]) kmeans = KMeans(n_clusters = num_pseudo_params, init = 'k-means++') fit = kmeans.fit(X) smart_x0 = fit.cluster_centers_ #inds = npr.choice(len(X), num_pseudo_params, replace = False) #smart_x0 = np.array(X)[inds,:] smart_y0 = np.ndarray.flatten(smart_x0) #smart_y0 = np.array(y)[inds] smart_noise_scale = np.log(np.var(smart_y0)) else: smart_x0 = gp_map['x0'] smart_y0 = np.ndarray.flatten(smart_x0[:,0]) smart_lengthscales = np.array([np.log(1) for i in xrange(len(lengthscales))]) smart_noise_scale = np.log(np.var(smart_y0)) gp_map['cov_params'] = np.append(cov_params[0],smart_lengthscales) gp_map['x0'] = smart_x0 gp_map['y0'] = smart_y0 #gp_map['noise_scale'] = smart_noise_scale smart_map[layer][unit] = gp_map smart_params = pack_deep_params(smart_map) return smart_params
def fit_maxlike(data, r_guess): # follows Wikipedia's section on negative binomial max likelihood assert np.var(data) > np.mean(data), "Likelihood-maximizing parameters don't exist!" loglike = lambda r, p: np.sum(negbin_loglike(r, p, data)) p = lambda r: np.sum(data) / np.sum(r+data) rprime = lambda r: grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r)
def fit_maxlike(x, r_guess): # follows Wikipedia's section on negative binomial max likelihood assert np.var(x) > np.mean( x), "Likelihood-maximizing parameters don't exist!" def loglike(r, p): return np.sum(negbin_loglike(r, p, x)) def p(r): return np.sum(x) / np.sum(r + x) def rprime(r): return grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r)
def initialize(self, datas, inputs=None, masks=None, tags=None): # Initialize with KMeans from sklearn.cluster import KMeans data = np.concatenate(datas) km = KMeans(self.K).fit(data) self.mus = km.cluster_centers_ sigmas = np.array( [np.var(data[km.labels_ == k], axis=0) for k in range(self.K)]) self.inv_sigmas = np.log(sigmas + 1e-8) self.inv_nus = np.log(4) * np.ones(self.K)
def whiten_data(X): ''' Returns copy of dataset with zero mean and identity covariance ''' X = X.copy() X = X - X.mean(axis=0)[np.newaxis,:] stds = np.sqrt(np.var(X, axis=0))[np.newaxis,:] stds[np.where(stds == 0)] = 1.0 X = X / stds return X
def prediction_test(params, X, num_samples, location=0.0, scale=1.0): w_mean, w_chol = unpack_var_params(params) K = num_samples epsilon = rs.randn(num_weights, K) R_epsilon = np.dot(w_chol, epsilon) samples = R_epsilon.T + w_mean outputs = predictions(samples, X) * scale + location pred_mean = np.mean(outputs, axis=0) pred_var = np.var(outputs, axis=0) return pred_mean, pred_var
def optimize_hyperparameters(X, Y, inducing, kern, likelihood, messages=True): if type(inducing) is np.ndarray and len(inducing.shape) == 2: m = GPy.core.SparseGP(X, Y, inducing, kern, #GPy.kern.RBF(input_dim=X.shape[1], lengthscale=sq_length_scales.copy(), variance=kernel_var, ARD=True), likelihood) #GPy.likelihoods.Gaussian(variance=likelihood_var)) else: m = GPy.core.SparseGP(X, Y, X[np.random.randint(X.shape[0], size=inducing), :].copy(), kern, #GPy.kern.RBF(input_dim=X.shape[1], ARD=True), likelihood) #GPy.likelihoods.Gaussian()) try: m[''].constrain_bounded(1e-6, 1e6) m.likelihood.variance.constrain_bounded(1e-6, 10*np.var(Y)) m.kern.variance.constrain_bounded(1e-6, 10*np.var(Y)) #m.optimize('fmin_tnc', max_iters=10000, messages=True, ipython_notebook=False) m.optimize('lbfgsb', max_iters=10000, messages=messages, ipython_notebook=False) # adam, lbfgsb, except: pass #if constraining/optimization fails (GPy/paramz sometimes fails when constraining variables...) just use whatever the current solution is return m.kern, m.likelihood #np.asarray(m.rbf.lengthscale), np.asscalar(m.rbf.variance), np.asscalar(m.likelihood.variance)
def get_e_num_large_clusters_from_ez(e_z, threshold = 0, n_samples = None, unif_samples = None): """ Computes the expected number of clusters with at least t observations from cluster belongings e_z. Parameters ---------- e_z : ndarray Array whose (n, k)th entry is the probability of the nth datapoint belonging to cluster k n_obs : int Number of observations in a dataset. n_samples : int Number of Monte Carlo samples used to compute the expected number of clusters. unv_norm_samples : ndarray, optional The user may pass in a precomputed array of uniform random variables on which the reparameterization trick is applied to compute the expected number of clusters. Returns ------- float The expected number of clusters with at least ``threshold`` observations in a dataset the same size as e_z """ n_obs = e_z.shape[0] n_clusters = e_z.shape[1] # draw uniform samples if unif_samples is None: assert n_samples is not None unif_samples = np.random.random((n_obs, n_samples)) else: assert unif_samples is not None assert unif_samples.shape[0] == n_obs n_samples = unif_samples.shape[1] e_z_cumsum = np.cumsum(e_z, axis = 1) num_heavy_clusters_vec = np.zeros(n_samples) # z_sample is a n_obs x n_samples matrix of cluster belongings z_sample = _get_clusters_from_ez_and_unif_samples(e_z_cumsum, unif_samples) for i in range(n_clusters): # get number of clusters with at least enough points above the threshold num_heavy_clusters_vec += np.sum(z_sample == i, axis = 0) > threshold return np.mean(num_heavy_clusters_vec), np.var(num_heavy_clusters_vec)
def loss(localAlphaHats): lossVal = 0 # localAlphaHats = 1 / (1 + np.exp(-1 * localAlphaHats)) for wi, aH in zip(w, globalAlphaHats): tilde = 1 / np.sum(np.multiply(n, wi)) wiXA = np.multiply(wi, localAlphaHats) tilde = tilde * np.sum(np.multiply(wiXA, n)) lossVal = lossVal + .5 * np.square(aH - tilde) lossVal = lossVal + varLambda * np.sum(np.var(localAlphaHats, axis=1)) lossVal = lossVal + anchorLambda * np.sum( np.square(localAlphaHats - a0)) return lossVal
def get_error_and_ll(params, X, y, num_samples, location=0.0, scale=1.0): w_mean, w_std = unpack_var_params(params) noise_var_scale = noise_var * scale**2 K = num_samples samples = rs.randn(K, num_weights) * w_std + w_mean outputs = predictions(samples, X) * scale + location log_factor = -0.5 * np.log(2 * math.pi * noise_var_scale) - 0.5 * ( y - outputs)**2 / noise_var_scale ll = np.mean(logsumexp(log_factor - np.log(K), 0)) pred_mean = np.mean(outputs, axis=0) error = np.sqrt(np.mean((y - pred_mean)**2)) pred_var = np.var(outputs, axis=0) return pred_mean, pred_var, error, ll
def test_gamma_method_no_windowing(): for iteration in range(50): obs = pe.Obs( [np.random.normal(1.02, 0.02, 733 + np.random.randint(1000))], ['ens']) obs.gamma_method(S=0) assert obs.e_tauint['ens'] == 0.5 assert np.isclose( np.sqrt(np.var(obs.deltas['ens'], ddof=1) / obs.shape['ens']), obs.dvalue) obs.gamma_method(S=1.1) assert obs.e_tauint['ens'] > 0.5 with pytest.raises(Exception): obs.gamma_method(S=-0.2)
def sufficient_shading_variability(self): """ Ensure there is sufficient variability in the shading. """ samples = [] for idy, row in enumerate(self.patch): for idx, pixel in enumerate(row): if self.support_matrix[idy][idx]: direction = pixel - self.point samples.append(direction) samples = np.array(samples) variance = np.var(samples) score = np.sqrt(variance) / self.transmission return score > thresholds.shading
def monte_carlo_se_moving(chains, warmup=0.5, param_idx=0): """ Compute the monte carlo standard error for a variational parameter at each iterate using all iterates before that iterate. The MCSE is computed using eq (5) of https://arxiv.org/pdf/1903.08008.pdf Here, MCSE(\lambda_i)= sqrt(V(\lambda_i)/Seff) where ESS is the effective sample size computed using eq(11). MCSE is from 100th to the last iterate using all the chains. Parameters ---------- iterate_chains : multi-dimensional array, shape=(n_chains, n_iters, n_var_params) warmup : warmup iterates param_idx : index of the variational parameter Returns ------- mcse_combined_list : array of mcse values for variational parameter with param_idx """ n_chains, N_iters = chains.shape[0], chains.shape[1] if warmup < 1: warmup = int(warmup * N_iters) if warmup > N_iters - 1: raise ValueError('Warmup should be less than number of iterates ..') if (N_iters - warmup) % 2: warmup = int(warmup + 1) chains = chains[:, warmup:, param_idx] mcse_combined_list = np.zeros(N_iters) Neff, _, _, _ = autocorrelation(chains, warmup=0, param_idx=param_idx) for i in range(101, N_iters): chains_sub = chains[:, :i] n_chains, n_iters = chains_sub.shape[0], chains_sub.shape[1] chains_flat = np.reshape(chains_sub, (n_chains * i, 1)) variances_combined = np.var(chains_flat, ddof=1, axis=0) Neff, _, _, _ = autocorrelation(chains[:, :i, :], warmup=0, param_idx=param_idx) mcse_combined = np.sqrt(variances_combined / Neff) mcse_combined_list[i] = mcse_combined return np.array(mcse_combined_list)
def initialize(deep_map, X, num_pseudo_params): smart_map = {} for layer, layer_map in deep_map.iteritems(): smart_map[layer] = {} for unit, gp_map in layer_map.iteritems(): smart_map[layer][unit] = {} cov_params = gp_map['cov_params'] lengthscales = cov_params[1:] if layer == 0: pairs = itertools.combinations(X, 2) dists = np.array([np.abs(p1 - p2) for p1, p2 in pairs]) smart_lengthscales = np.array([ np.log(np.median(dists[:, i])) for i in xrange(len(lengthscales)) ]) kmeans = KMeans(n_clusters=num_pseudo_params, init='k-means++') fit = kmeans.fit(X) smart_x0 = fit.cluster_centers_ #inds = npr.choice(len(X), num_pseudo_params, replace = False) #smart_x0 = np.array(X)[inds,:] smart_y0 = np.ndarray.flatten(smart_x0) #smart_y0 = np.array(y)[inds] smart_noise_scale = np.log(np.var(smart_y0)) else: smart_x0 = gp_map['x0'] smart_y0 = np.ndarray.flatten(smart_x0[:, 0]) smart_lengthscales = np.array( [np.log(1) for i in xrange(len(lengthscales))]) smart_noise_scale = np.log(np.var(smart_y0)) gp_map['cov_params'] = np.append(cov_params[0], smart_lengthscales) gp_map['x0'] = smart_x0 gp_map['y0'] = smart_y0 #gp_map['noise_scale'] = smart_noise_scale smart_map[layer][unit] = gp_map smart_params = pack_deep_params(smart_map) return smart_params
def get_basic_kernel(t, y, yerr): kernel = terms.SHOTerm( log_S0=np.log(np.var(y)), log_Q=-np.log(4.0), log_omega0=np.log(2 * np.pi / 10.), bounds=dict( log_S0=(-20.0, 10.0), log_omega0=(np.log(2 * np.pi / 80.0), np.log(2 * np.pi / 2.0)), ), ) kernel.freeze_parameter('log_Q') # Finally some jitter kernel += terms.JitterTerm(log_sigma=np.log(np.median(yerr)), bounds=[(-20.0, 5.0)]) return kernel
def total_likelihood(self, data): if self.is_fitted: total_likelihood = np.zeros([len(self.graph), data.shape[0]]) total_std_log_Z = np.zeros([len(self.graph), data.shape[0]]) for e, node in enumerate(self.graph): x, y = self.split_cond(data, node) log_Z, std_log_Z = self.log_partition(y, x, e) cond_likelihood = self.log_pdf(y, x, e) - log_Z total_likelihood[e, :] = cond_likelihood total_std_log_Z[e, :] = std_log_Z likelihood = np.mean(np.sum(total_likelihood, axis=0)) std_likelihood = np.var(np.sum( total_likelihood, axis=0)) + np.mean( np.sum(total_std_log_Z**2, axis=0)) std_likelihood = np.sqrt(std_likelihood) return likelihood, std_likelihood
def smart_initialize_params(init_params): layer_params, x0, y0 = unpack_all_params(init_params) # Initialize the first length scale parameter as the median distance between points pairs = itertools.combinations(X, 2) dists = np.array([np.linalg.norm(np.array([p1])- np.array([p2])) for p1,p2 in pairs]) layer_params[0][2] = np.log(np.var(y)) layer_params[0][3] = np.log(np.median(dists)) # Initialize the pseudo inputs for the first layer by sampling from the data, the pseudo outputs equal to the inputs x0[0] = np.ndarray.flatten(np.array(X)[rs.choice(len(X), num_pseudo_params, replace=False),:]) y0[0] = x0[0] # For every other layer, set the inducing outputs to the inducing inputs (which are sampled from N(0,.01)) and lengthscale large for layer in xrange(1,n_layers): y0[layer] = x0[layer] layer_params[layer][3] = np.log(1) return pack_all_params(layer_params, x0, y0)
def get_rotation_kernel(t, y, yerr, period, min_period, max_period): kernel = MixtureOfSHOsTerm( log_a=np.log(np.var(y)), ## amplitude of the main peak log_Q1=np.log( 15 ), ## decay timescale of the main peak (width of the spike in the FT) mix_par=4., ## height of second peak relative to first peak log_Q2=np.log(15), ## decay timescale of the second peak log_P=np.log( period), ## period (second peak is constrained to twice this) bounds=dict( log_a=(-20.0, 10.0), log_Q1=(0., 10.0), mix_par=(-5.0, 10.0), log_Q2=(0., 10.0), log_P=(None, None), # np.log(min_period), np.log(max_period)), )) return kernel
def initialize(self, datas, inputs=None, masks=None, tags=None): # Initialize with linear regressions from sklearn.linear_model import LinearRegression data = np.concatenate(datas) input = np.concatenate(inputs) T = data.shape[0] for k in range(self.K): ts = npr.choice(T-self.lags, replace=False, size=(T-self.lags)//self.K) x = np.column_stack([data[ts + l] for l in range(self.lags)] + [input[ts]]) y = data[ts+self.lags] lr = LinearRegression().fit(x, y) self.As[k] = lr.coef_[:, :self.D * self.lags] self.Vs[k] = lr.coef_[:, self.D * self.lags:] self.bs[k] = lr.intercept_ resid = y - lr.predict(x) sigmas = np.var(resid, axis=0) self.inv_sigmas[k] = np.log(sigmas + 1e-8)
def get_rotation_gp(t, y, yerr, period, min_period, max_period): kernel = get_basic_kernel(t, y, yerr) kernel += MixtureOfSHOsTerm(log_a=np.log(np.var(y)), log_Q1=np.log(15), mix_par=-1.0, log_Q2=np.log(15), log_P=np.log(period), bounds=dict( log_a=(-20.0, 10.0), log_Q1=(-0.5 * np.log(2.0), 11.0), mix_par=(-5.0, 5.0), log_Q2=(-0.5 * np.log(2.0), 11.0), log_P=(np.log(min_period), np.log(max_period)), )) gp = celerite.GP(kernel=kernel, mean=0.) gp.compute(t) return gp
def fprop( tau, prev_taus, n_layers, n_hid_units, is_ResNet, batch_norm=True, ): n_prev_taus = prev_taus.shape[0] prev_hidden = relu(np.dot(X, norm.rvs(size=(1, n_hid_units)))) h = 0. for layer_idx in range(n_layers): # if not a resNet and the rest of the scales are 0, break the loop if layer_idx > n_prev_taus and not is_ResNet: break # sample weights sigma = 0. eps = norm.rvs(size=(n_hid_units, n_hid_units)) if layer_idx < n_prev_taus: sigma = prev_taus[layer_idx] elif layer_idx == n_prev_taus: sigma = tau if sigma < 0: break w_hat = sigma * eps # activatiom a = np.dot(prev_hidden, w_hat) # batchnorm (no trainable params) if batch_norm: a = (a - np.mean(a, axis=0)) / np.sqrt(np.var(a, axis=0) + 10) if is_ResNet: h = h + relu(a) else: h = relu(a) prev_hidden = h w_out_hat = norm.rvs(size=(n_hid_units, 1)) * n_hid_units**(-.5) return np.dot(prev_hidden, w_out_hat)
def MCSE(sample): """ Compute the Monte Carlo standard error (MCSE) Parameters ---------- samples : `numpy.ndarray(n_iters, 2*dim)` An array containing variational samples Returns ------- mcse : `numpy.ndarray(2*dim)` MCSE for each variational parameter """ n_iters, d = sample.shape sd_dev = np.sqrt(np.var(sample,ddof=1,axis=0)) eff_samp = [ess(sample[:,i].reshape(1,n_iters)) for i in range(d)] mcse = sd_dev/np.sqrt(eff_samp) return eff_samp, mcse
def get_basic_kernel(t, y, yerr, period=False): if not period: period = 0.5 kernel = terms.SHOTerm( log_S0=np.log(np.var(y)), log_Q=-np.log(4.0), log_omega0=np.log(2 * np.pi / 20.), bounds=dict( log_S0=(-20.0, 10.0), log_omega0=(np.log(2 * np.pi / 100.), np.log(2 * np.pi / (10))), ), ) kernel.freeze_parameter('log_Q') ## tau = 2*np.exp(-1*np.log(4.0))/np.exp(log_omega0) # Finally some jitter ls = np.log(np.median(yerr)) kernel += terms.JitterTerm(log_sigma=ls, bounds=[(ls - 5.0, ls + 5.0)]) return kernel
def smart_initialize_params(init_params): layer_params, x0, y0 = unpack_all_params(init_params) # Initialize the first length scale parameter as the median distance between points pairs = itertools.combinations(X, 2) dists = np.array([ np.linalg.norm(np.array([p1]) - np.array([p2])) for p1, p2 in pairs ]) layer_params[0][2] = np.log(np.var(y)) layer_params[0][3] = np.log(np.median(dists)) # Initialize the pseudo inputs for the first layer by sampling from the data, the pseudo outputs equal to the inputs x0[0] = np.ndarray.flatten( np.array(X)[ rs.choice(len(X), num_pseudo_params, replace=False), :]) y0[0] = x0[0] # For every other layer, set the inducing outputs to the inducing inputs (which are sampled from N(0,.01)) and lengthscale large for layer in xrange(1, n_layers): y0[layer] = x0[layer] layer_params[layer][3] = np.log(1) return pack_all_params(layer_params, x0, y0)
def score_estimator(alpha, m, x, K, alphaz, S=100): """ Form score function estimator based on samples lmbda. """ N = x.shape[0] if x.ndim == 1: D = 1 else: D = x.shape[1] num_z = N * np.sum(K) L = K.shape[0] gradient = np.zeros((alpha.shape[0], 2)) f = np.zeros((2 * S, alpha.shape[0], 2)) h = np.zeros((2 * S, alpha.shape[0], 2)) for s in range(2 * S): lmbda = npr.gamma(alpha, 1.) lmbda[lmbda < 1e-300] = 1e-300 zw = m * lmbda / alpha lQ = logQ(zw, alpha, m) gradLQ = grad_logQ(zw, alpha, m) lP = logp(zw, K, x, alphaz) temp = lP - np.sum(lQ) f[s, :, :] = temp * gradLQ h[s, :, :] = gradLQ # CV covFH = np.zeros((alpha.shape[0], 2)) covFH[:, 0] = np.diagonal( np.cov(f[S:, :, 0], h[S:, :, 0], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) covFH[:, 1] = np.diagonal( np.cov(f[S:, :, 1], h[S:, :, 1], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) a = covFH / np.var(h[S:, :, :], axis=0) return np.mean(f[:S, :, :], axis=0) - a * np.mean(h[:S, :, :], axis=0)
def callback(combined_params, t, combined_gradient): params, est_params = combined_params grad_params, grad_est = combined_gradient log_temperature, nn_params = est_params temperatures.append(np.exp(log_temperature)) if t % 10 == 0: objective_val, grads, est_grads = mc_objective_and_var( combined_params, t) print("Iteration {} objective {}".format(t, np.mean(objective_val))) ax1.cla() ax1.plot(expit(params), 'r') ax1.set_ylabel('parameter values') ax1.set_xlabel('parameter index') ax1.set_ylim([0, 1]) ax2.cla() ax2.plot(grad_params, 'g') ax2.set_ylabel('average gradient') ax2.set_xlabel('parameter index') ax3.cla() ax3.plot(np.var(grads), 'b') ax3.set_ylabel('gradient variance') ax3.set_xlabel('parameter index') ax4.cla() ax4.plot(temperatures, 'b') ax4.set_ylabel('temperature') ax4.set_xlabel('iteration') ax5.cla() xrange = np.linspace(0, 1, 200) f_tilde = lambda x: nn_predict(nn_params, x) f_tilde_map = map_and_stack(make_one_d(f_tilde, slice_dim, params)) ax5.plot(xrange, f_tilde_map(logit(xrange)), 'b') ax5.set_ylabel('1d slide of surrogate') ax5.set_xlabel('relaxed sample') plt.draw() plt.pause(1.0 / 30.0)
def normalize0(self, data, axis=0): assert (np.isfinite(data).all() == True) mean = np.mean(data, axis=axis) var = np.var(data, axis=axis) stdn = np.std(data, axis=axis) minimum_arr = np.amin(data, axis=axis, keepdims=True) maximum_arr = np.amax(data, axis=axis, keepdims=True) normalize_state = { "mean": mean, "var": var, "min": minimum_arr, "max": maximum_arr, "stdn": stdn } if (self.config.NN_ZERO_MEAN_NORMALIZE == True): normalized = (data - mean) / (stdn + 0.00001) else: normalized = (data - minimum_arr) / (maximum_arr - minimum_arr + 0.0001) return normalized.reshape(data.shape), normalize_state
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: for u in range(self.n_unique): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u]).fit(self.n_lags)) ar_alpha.append(ar_mod[u].params[1:]) ar_resid.append(ar_mod[u].resid) else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf]).fit(self.n_lags)) ar_alpha.append(ar_mod[0].params[1:]) ar_resid.append(ar_mod[0].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u] = kmeans[u, 0] - np.dot( np.repeat(kmeans[u, 0], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): if not self.shared_alpha: maxVar = np.max([np.var(ar_resid[i]) for i in range(self.n_unique)]) else: maxVar = np.var(ar_resid[0]) precision_init[u] = 1.0 / maxVar self.precision_ = np.copy(precision_init) if 'a' in params: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 alpha_init[u, :] = ar_alpha[ar_idx] self.alpha_ = alpha_init
def autocorrelation(iterate_chains, warmup=0.5, param_idx=0, lag_max=100): """ Compute the autocorrelation and ESS for a variational parameter using FFT. where ESS is the effective sample size computed using eq(10) and (11) of https://arxiv.org/pdf/1903.08008.pdf MCSE is from 100th to the last iterate using all the chains. Parameters ---------- iterate_chains : multi-dimensional array, shape=(n_chains, n_iters, n_var_params) warmup : warmup iterates param_idx : index of the variational parameter lag_max: lag value Returns ------- neff : Effective sample size rho_t: autocorrelation at last lag autocov: auto covariance using FFT a: array of autocorrelation from lag t=0 to lag t=lag_max """ n_iters = iterate_chains.shape[1] n_chains = iterate_chains.shape[0] if warmup < 1: warmup = int(warmup * n_iters) if warmup > n_iters - 2: raise ValueError('Warmup should be less than number of iterates ..') if (n_iters - warmup) % 2: warmup = int(warmup + 1) chains = iterate_chains[:, warmup:, param_idx] means = np.mean(chains, axis=1) variances = np.var(chains, ddof=1, axis=1) if n_chains == 1: var_between = 0 else: var_between = n_iters * np.var(means, ddof=1) var_chains = np.mean(variances, axis=0) var_pooled = ((n_iters - 1.) * var_chains + var_between) / n_iters n_pad = int(2**np.ceil(1. + np.log2(n_iters))) freqs = np.fft.rfft(chains - np.expand_dims(means, axis=1), n_pad) #print(freqs) autocov = np.fft.irfft(np.abs(freqs)**2)[:, :n_iters].real autocov = autocov / np.arange(n_iters, 0, -1) rho_t = 0 lag = 1 a = [] neff_array = [] for lag in range(lag_max): val = 1. - (var_chains - np.mean(autocov[:, lag])) / var_pooled a.append(val) if val >= 0: rho_t = rho_t + val else: #break rho_t = rho_t neff = n_iters * n_chains / (1 + 2 * rho_t) return neff, rho_t, autocov, np.asarray(a)
beta_0 = 1e-5 # learning rate for the dual update theta_sum_old = np.mat(np.zeros((n + n * n, 1))) loss = np.mat(np.zeros((T, 1))) theta = np.mat(np.ones((n + n * n, 1))) # primal variable, mu + L y = np.mat(np.ones((2, 1))) # dual variable pair_dist = np.mat(np.zeros((n * n, 1))) for i in range(1, n): for j in range(1, n): if i == j: continue pair_dist[(i - 1) * n + j, :] = np.log( np.linalg.norm(training_data[i, :] - training_data[j, :])) pair_dist_ordering = np.sort(pair_dist, axis=0) mu_0 = pair_dist_ordering[n * (n + 1) / 2, :] # hyper-parameter for sampling w sigma_0 = np.mat(3 * np.var(pair_dist_ordering) ) # hyper-parameter for sampling w, initialized by 3 for t in range(0, T): #i = np.random.random_integers(1, high=n, size=1) i = t print "i=", i Knn = np.mat(np.zeros((n, n))) # kernel matrix # sample v, w logw = np.random.normal(mu_0, sigma_0, (d + 2, 1)) w = np.exp(logw) # u_0 = w[0, :] u_0 = 1 u = w[ 1:d + 1, 0] # pick w's row from 1 to d, the d+1-th row is not picked! [different from MATLAB]
def test_scale_data(): x = np.arange(10).reshape(-1, 1) x = np.outer(x, np.ones(5)) x = scale_data(x, with_mean=True, with_var=True) assert (np.all(np.equal(np.var(x, 0), np.ones(x.shape[1]))))
def batch_normalize(W): mu = np.mean(W, axis=0) var = np.var(W, axis=0) W = (W - mu) / np.sqrt(var + 1) return W
if __name__ == '__main__': filtered_means = [] filtered_covs = [] total_thetas = [] n_iter = 1000 time_series = np.round(np.power(np.sin(np.arange(10)+1),2)*10 + 10) model = StateSpaceModel() num_particles = 10 x0 = np.random.normal(0,10,[num_particles,2]).astype(float) theta = SVGD().update(x0,0,x0,time_series, model.grad_overall, n_iter=n_iter, stepsize=0.01) total_thetas.append(theta) #theta = p(x_0|y_0) filtered_means.append(np.mean(theta,axis=0)[0]) filtered_covs.append(np.var(theta,axis=0)[0]) for t in range(1,len(time_series)): theta = SVGD().update(theta,t,theta, time_series, model.grad_overall, n_iter=n_iter, stepsize=0.01) total_thetas.append(theta) filtered_means.append(np.mean(theta,axis=0)[0]) filtered_covs.append(np.var(theta,axis=0)[0]) return_list = filtered_means + filtered_covs myList = ','.join(map(str,np.array(total_thetas).flatten() )) print (myList)
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u, f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0 / (np.var( X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)
def empirical_l2_reg(images, hdims): l2 = init_gmlp(hdims, images.shape[1], 1, scale=0.) W_1, b_1 = l2[0] W_1[:] = 1. / (0.001 + np.var(images, axis=0)[:,None]) return flatten(l2)[0]
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u,f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)