def __init__(self, x, y, alpha=0., sigma=None, lamb=None, kernel_num=100): self.__x = transform_data(x) self.__y = transform_data(y) if self.__x.shape[1] != self.__y.shape[1]: raise ValueError("x and y must be same dimentions.") if sigma is None: sigma = np.logspace(-4, 9, 14) if lamb is None: lamb = np.logspace(-4, 9, 14) self.__x_num_row = self.__x.shape[0] self.__y_num_row = self.__y.shape[0] self.__kernel_num = min( [kernel_num, self.__x_num_row] ) # kernel number is the minimum number of x's lines and the number of kernel. self.__centers = np.array( rand.sample(list(self.__x), k=self.__kernel_num) ) # randomly choose candidates of rbf kernel centroid. self.__n_minimum = min(self.__x_num_row, self.__y_num_row) self.__kernel = jit(partial(gauss_kernel, centers=self.__centers)) self._RuLSIF( x=self.__x, y=self.__y, alpha=alpha, s_sigma=np.atleast_1d(sigma), s_lambda=np.atleast_1d(lamb), )
def __init__(self, x, y, alpha=0., sigma=None, lamb=None, kernel_num=100): """[summary] Args: x (array-like of float): Numerator samples array. x is generated from p(x). y (array-like of float): Denumerator samples array. y is generated from q(x). alpha (float or array-like, optional): The alpha is a parameter that can adjust the mixing ratio r(x) = p(x)/(alpha*p(x)+(1-alpha)q(x)) , and is set in the range of 0-1. Defaults to 0. sigma (float or array-like, optional): Bandwidth of kernel. If a value is set for sigma, that value is used for kernel bandwidth , and if a numerical array is set for sigma, Densratio selects the optimum value by using CV. Defaults to array of 10e-4 to 10e+9 divided into 14 on the log scale. lamb (float or array-like, optional): Regularization parameter. If a value is set for lamb, that value is used for hyperparameter , and if a numerical array is set for lamb, Densratio selects the optimum value by using CV. Defaults to array of 10e-4 to 10e+9 divided into 14 on the log scale. kernel_num (int, optional): The number of kernels in the linear model. Defaults to 100. Raises: ValueError: [description] """ self.__x = transform_data(x) self.__y = transform_data(y) if self.__x.shape[1] != self.__y.shape[1]: raise ValueError("x and y must be same dimentions.") if sigma is None: sigma = np.logspace(-3,1,9) if lamb is None: lamb = np.logspace(-3,1,9) self.__x_num_row = self.__x.shape[0] self.__y_num_row = self.__y.shape[0] self.__kernel_num = np.min(np.array([kernel_num, self.__x_num_row])).item() #kernel number is the minimum number of x's lines and the number of kernel. self.__centers = np.array(rand.sample(list(self.__x),k=self.__kernel_num)) #randomly choose candidates of rbf kernel centroid. self.__n_minimum = min(self.__x_num_row, self.__y_num_row) # self.__kernel = jit(partial(gauss_kernel,centers=self.__centers)) self._RuLSIF(x = self.__x, y = self.__y, alpha = alpha, s_sigma = np.atleast_1d(sigma), s_lambda = np.atleast_1d(lamb), )
def cts_lr_fields(mdp): """ How does the learning rate change the vector field??? """ n = 3 lrs = np.logspace(-5, 0, n * n) pis = gen_grid_policies(N=31) vs = polytope(mdp.P, mdp.r, mdp.discount, pis) qs = [np.einsum('ijk,i->jk', mdp.P, v) for v in vs] many_cores = fitted_cores(mdp, qs) plt.figure(figsize=(16, 16)) plt.title('PVI') for i, lr in enumerate(lrs): dpvis = pvi_vector_field(mdp, many_cores, lr) # dont expect vi to change with the lr?! # dvis = vi_vector_field(mdp, qs, lr) plt.subplot(n, n, i + 1) plt.title('lr: {:.3f}'.format(lr)) plt_field(vs, dpvis) # plt.title('Pamameterised VI') # plt.savefig('figs/lr_limit_{:.3f}.png'.format(lr)) plt.savefig('traj-figs/lr_limit_pvi.png', dpi=300)
def pressure_layer(logPtop=-8., logPbtm=2., NP=20, mode='ascending'): """generating the pressure layer. Args: logPtop: log10(P[bar]) at the top layer logPbtm: log10(P[bar]) at the bottom layer NP: the number of the layers Returns: Parr: pressure layer dParr: delta pressure layer k: k-factor, P[i-1] = k*P[i] Note: dParr[i] = Parr[i] - Parr[i-1], dParr[0] = (1-k) Parr[0] for ascending mode """ dlogP = (logPbtm - logPtop) / (NP - 1) k = 10**-dlogP Parr = jnp.logspace(logPtop, logPbtm, NP) dParr = (1.0 - k) * Parr if mode == 'descending': Parr = Parr[::-1] dParr = dParr[::-1] return jnp.array(Parr), jnp.array(dParr), k
def _growth_factor_gamma(cosmo, a, log10_amin=-3, steps=128): r""" Computes growth factor by integrating the growth rate provided by the \gamma parametrization. Normalized such that D( a=1) =1 Parameters ---------- a: array_like Scale factor amin: float Mininum scale factor, default 1e-3 Returns ------- D: ndarray, or float if input scalar Growth factor computed at requested scale factor """ # Check if growth has already been computed, if not, compute it if not "background.growth_factor" in cosmo._workspace.keys(): # Compute tabulated array atab = np.logspace(log10_amin, 0.0, steps) def integrand(y, loga): xa = np.exp(loga) return _growth_rate_gamma(cosmo, xa) gtab = np.exp(odeint(integrand, np.log(atab[0]), np.log(atab))) gtab = gtab / gtab[-1] # Normalize to a=1. cache = {"a": atab, "g": gtab} cosmo._workspace["background.growth_factor"] = cache else: cache = cosmo._workspace["background.growth_factor"] return np.clip(interp(a, cache["a"], cache["g"]), 0.0, 1.0)
def tune_lr(method_id, method_params, problem_id, problem_params): #print("Learning Rate Tuning not yet available!") #return method_params loss = lambda a, b: np.sum((a - b)**2) optimizer = method_params['optimizer'] search_space = {'optimizer': []} # parameters for ARMA method lr_start, lr_stop = -1, -3 # search learning rates from 10^start to 10^stop learning_rates = np.logspace(lr_start, lr_stop, 1 + 2 * np.abs(lr_start - lr_stop)) for lr in learning_rates: search_space['optimizer'].append( optimizer(learning_rate=lr)) # create instance and append trials, min_steps = None, 100 hpo = GridSearch() # hyperparameter optimizer optimal_params, optimal_loss = hpo.search( method_id, method_params, problem_id, problem_params, loss, search_space, trials=trials, smoothing=10, min_steps=min_steps, verbose=0) # run each model at least 1000 steps return optimal_params
def test_gaussian_log_likelihood(): n_ell = 5 ell = jnp.logspace(1, 3, n_ell) nz1 = smail_nz(1.0, 2.0, 1.0) nz2 = smail_nz(1.0, 2.0, 0.5) n_cls = 3 P = [probes.NumberCounts([nz1, nz2], constant_linear_bias(1.0))] cosmo = Planck15() mu, cov_sparse = gaussian_cl_covariance_and_mean(cosmo, ell, P, sparse=True) cov_dense = to_dense(cov_sparse) data = 1.1 * mu for include_logdet in (True, False): loglike_sparse = gaussian_log_likelihood(data, mu, cov_sparse, include_logdet=include_logdet) for method in "inverse", "cholesky": loglike_dense = gaussian_log_likelihood( data, mu, cov_dense, include_logdet=include_logdet, inverse_method=method, ) assert_allclose(loglike_sparse, loglike_dense, rtol=1e-6)
def test_grid_search_lstm(show=False): problem_id = "SP500-v0" method_id = "LSTM" problem_params = {} # {'p':4, 'q':1} # params for ARMA problem method_params = {'n': 1, 'm': 1} loss = lambda a, b: np.sum((a - b)**2) search_space = { 'l': [3, 4, 5, 6], 'h': [2, 5, 8], 'optimizer': [] } # parameters for ARMA method opts = [Adam, Adagrad, ONS, OGD] lr_start, lr_stop = -1, -3 # search learning rates from 10^start to 10^stop learning_rates = np.logspace(lr_start, lr_stop, 1 + 2 * np.abs(lr_start - lr_stop)) for opt, lr in itertools.product(opts, learning_rates): search_space['optimizer'].append( opt(learning_rate=lr)) # create instance and append trials, min_steps = 10, 100 hpo = GridSearch() # hyperparameter optimizer optimal_params, optimal_loss = hpo.search( method_id, method_params, problem_id, problem_params, loss, search_space, trials=trials, smoothing=10, min_steps=min_steps, verbose=show) # run each model at least 1000 steps if show: print("optimal params: ", optimal_params) print("optimal loss: ", optimal_loss) # test resulting method params method = tigerforecast.method(method_id) method.initialize(**optimal_params) problem = tigerforecast.problem(problem_id) x = problem.initialize(**problem_params) loss = [] if show: print("run final test with optimal parameters") for t in range(5000): y_pred = method.predict(x) y_true = problem.step() loss.append(mse(y_pred, y_true)) method.update(y_true) x = y_true if show: print("plot results") plt.plot(loss) plt.show(block=False) plt.pause(10) plt.close()
def test_comparison_hjert_scipy(): Na = 300 vl = -3 vm = 5 xarrv = jnp.logspace(vl, vm, Na) xarr = xarrv[:, None] * jnp.ones((Na, Na)) aarrv = jnp.logspace(vl, vm, Na) aarr = aarrv[None, :] * jnp.ones((Na, Na)) # scipy def H(a, x): z = x + (1j) * a w = sc_wofz(z) return w.real # hjert def vhjert(a): return vmap(hjert, (0, None), 0)(xarrv, a) vvhjert = jit(vmap(vhjert, 0, 0)) diffarr = (vvhjert(aarrv).T - H(aarr, xarr)) / H(aarr, xarr) print('MEDIAN=', np.median(diffarr), 'MAX=', np.max(diffarr)) # figure import matplotlib.pyplot as plt from matplotlib.ticker import MultipleLocator, FormatStrFormatter fig = plt.figure() ax = fig.add_subplot(111) c = ax.imshow((vvhjert(aarrv).T - H(aarr, xarr)) / H(aarr, xarr), vmin=-1.e-6, vmax=1.e-6, cmap='RdBu', extent=([vl, vm, vm, vl]), rasterized=True) plt.gca().invert_yaxis() plt.ylabel('$\log_{10}(x)$') plt.xlabel('$\log_{10}(a)$') cb = plt.colorbar(c) cb.formatter.set_powerlimits((0, 0)) cb.set_label('(hjert - scipy)/scipy', size=14) plt.savefig('hjert.png', bbox_inches='tight', pad_inches=0.0) plt.savefig('hjert.pdf', bbox_inches='tight', pad_inches=0.0) assert np.max(diffarr) < 1.e-6
def test_vterm(): g = 980. drho = 1.0 rho = 1.29*1.e-3 # g/cm3 vfactor, Tr = viscosity.calc_vfactor(atm='Air') eta = viscosity.eta_Rosner(300.0, vfactor) r = jnp.logspace(-5, 0, 70) vfall = vterm.vf(r, g, eta, drho, rho) assert jnp.mean(vfall)-328.12296 < 1.e-5
def ell_binning(): # we put this here to make sure it's used consistently # plausible limits I guess ell_max = 2000 n_ell = 100 # choose ell bins from 10 .. 2000 log spaced ell_edges = np.logspace(2, np.log10(ell_max), n_ell+1) ell = 0.5*(ell_edges[1:]+ell_edges[:-1]) delta_ell =(ell_edges[1:]-ell_edges[:-1]) return ell, delta_ell
def _halofit_parameters(cosmo, a, transfer_fn): r""" Computes the non linear scale, effective spectral index, spectral curvature """ # Step 1: Finding the non linear scale for which sigma(R)=1 # That's our search range for the non linear scale r = np.logspace(-3, 1, 256) @jax.vmap def R_nl(a): def int_sigma(logk): k = np.exp(logk) y = np.outer(k, r) pk = linear_matter_power(cosmo, k, transfer_fn=transfer_fn) g = bkgrd.growth_factor(cosmo, np.atleast_1d(a)) return ( np.expand_dims(pk * k ** 3, axis=1) * np.exp(-(y ** 2)) / (2.0 * np.pi ** 2) * g ** 2 ) sigma = simps(int_sigma, np.log(1e-4), np.log(1e4), 256) root = interp(np.atleast_1d(1.0), sigma, r) return root # Compute non linear scale k_nl = 1.0 / R_nl(np.atleast_1d(a)).squeeze() # Step 2: Retrieve the spectral index and spectral curvature def integrand(logk): k = np.exp(logk) y = np.outer(k, 1.0 / k_nl) pk = linear_matter_power(cosmo, k, transfer_fn=transfer_fn) g = np.expand_dims(bkgrd.growth_factor(cosmo, np.atleast_1d(a)), 0) res = ( np.expand_dims(pk * k ** 3, axis=1) * np.exp(-(y ** 2)) * g ** 2 / (2.0 * np.pi ** 2) ) dneff_dlogk = 2 * res * y ** 2 dC_dlogk = 4 * res * (y ** 2 - y ** 4) return np.stack([dneff_dlogk, dC_dlogk], axis=1) res = simps(integrand, np.log(1e-4), np.log(1e4), 256) n_eff = res[0] - 3.0 C = res[0] ** 2 + res[1] return k_nl, n_eff, C
def test_comparison_hjert_scipy(): Na=300 vl=-3 vm=5 xarrv=jnp.logspace(vl,vm,Na) xarr=xarrv[:,None]*jnp.ones((Na,Na)) aarrv=jnp.logspace(vl,vm,Na) aarr=aarrv[None,:]*jnp.ones((Na,Na)) #scipy def H(a,x): z=x+(1j)*a w = sc_wofz(z) return w.real # hjert def vhjert(a): return vmap(hjert,(0,None),0)(xarrv,a) vvhjert=jit(vmap(vhjert,0,0)) diffarr=(vvhjert(aarrv).T-H(aarr,xarr))/H(aarr,xarr) assert np.max(diffarr)<1.e-6
def test_cubic_spline(): # We sample some irregularly sampled points x = np.logspace(-2, 1, 64) y = _testing_function(x) spl = InterpolatedUnivariateSpline(x, y, k=3) spl_ref = RefSpline(x, y, k=3) # Vector of points at which to interpolate, note that this goes outside of # the interpolation data, so we are also testing extrapolation t = np.linspace(-1, 11, 128) assert_allclose(spl_ref(t), spl(t), rtol=1e-10) # Test the antiderivative, up to integration constant a = spl_ref.antiderivative()(t) - spl_ref.antiderivative()(0.01) b = spl.antiderivative(t) - spl.antiderivative(0.01) assert_allclose(a, b, rtol=1e-10)
def radial_comoving_distance(cosmo, a, log10_amin=-3, steps=256): r"""Radial comoving distance in [Mpc/h] for a given scale factor. Parameters ---------- a : array_like Scale factor Returns ------- chi : ndarray, or float if input scalar Radial comoving distance corresponding to the specified scale factor. Notes ----- The radial comoving distance is computed by performing the following integration: .. math:: \chi(a) = R_H \int_a^1 \frac{da^\prime}{{a^\prime}^2 E(a^\prime)} """ # Check if distances have already been computed if not "background.radial_comoving_distance" in cosmo._workspace.keys(): # Compute tabulated array atab = np.logspace(log10_amin, 0.0, steps) def dchioverdlna(y, x): xa = np.exp(x) return dchioverda(cosmo, xa) * xa chitab = odeint(dchioverdlna, 0.0, np.log(atab)) # np.clip(- 3000*np.log(atab), 0, 10000)#odeint(dchioverdlna, 0., np.log(atab), cosmo) chitab = chitab[-1] - chitab cache = {"a": atab, "chi": chitab} cosmo._workspace["background.radial_comoving_distance"] = cache else: cache = cosmo._workspace["background.radial_comoving_distance"] a = np.atleast_1d(a) # Return the results as an interpolation of the table return np.clip(interp(a, cache["a"], cache["chi"]), 0.0)
def testNTKPredCovPosDef(self, train_shape, test_shape, network, out_logits): key = random.PRNGKey(0) key, split = random.split(key) x_train = np.cos(random.normal(split, train_shape)) key, split = random.split(key) y_train = np.array( random.bernoulli(split, shape=(train_shape[0], out_logits)), np.float32) key, split = random.split(key) x_test = np.cos(random.normal(split, test_shape)) _, _, ker_fun = _build_network(train_shape[1:], network, out_logits) reg = 1e-7 ntk_predictions = predict.gradient_descent_mse_gp(ker_fun, x_train, y_train, x_test, diag_reg=reg, get='ntk', compute_cov=True) ts = np.logspace(-2, 8, 10) ntk_cov_predictions = [ntk_predictions(t).covariance for t in ts] if xla_bridge.get_backend().platform == 'tpu': eigh = np.onp.linalg.eigh else: eigh = np.linalg.eigh check_symmetric = np.array( [np.max(np.abs(cov - cov.T)) for cov in ntk_cov_predictions]) check_pos_evals = np.min( np.array([eigh(cov)[0] + 1e-10 for cov in ntk_cov_predictions])) self.assertAllClose(check_symmetric, np.zeros_like(check_symmetric), True) self.assertGreater(check_pos_evals, 0., True)
def test_grid_search_arma(show=False): environment_id = "LDS" controller_id = "GPC" environment_params = {'n':3, 'm':2} controller_params = {} loss = lambda a, b: np.sum((a-b)**2) search_space = {'optimizer':[]} # parameters for LQR controller opts = [Adam, Adagrad, ONS, OGD] lr_start, lr_stop = 0, -4 # search learning rates from 10^start to 10^stop learning_rates = np.logspace(lr_start, lr_stop, 1+2*np.abs(lr_start - lr_stop)) for opt, lr in itertools.product(opts, learning_rates): search_space['optimizer'].append(opt(learning_rate=lr)) # create instance and append trials = 15 hpo = GridSearch() # hyperparameter optimizer optimal_params, optimal_loss = hpo.search(controller_id, controller_params, environment_id, environment_params, loss, search_space, trials=trials, smoothing=10, start_steps=100, verbose=show) if show: print("optimal loss: ", optimal_loss) print("optimal params: ", optimal_params) # test resulting controller params controller = tigercontrol.controllers(controller_id) controller.initialize(**optimal_params) environment = tigercontrol.environment(environment_id) x = environment.reset(**environment_params) loss = [] if show: print("run final test with optimal parameters") for t in range(5000): y_pred = controller.predict(x) y_true = environment.step() loss.append(mse(y_pred, y_true)) controller.update(y_true) x = y_true if show: plt.plot(loss) plt.show(block=False) plt.pause(10) plt.close()
def testPredCovPosDef(self, train_shape, test_shape, network, out_logits): _, x_test, x_train, y_train = self._get_inputs(out_logits, test_shape, train_shape) _, _, ker_fun = _build_network(train_shape[1:], network, out_logits) ts = np.logspace(-3, 3, 10) predict_fn_mse_ens = predict.gradient_descent_mse_ensemble( ker_fun, x_train, y_train) for get in ('nngp', 'ntk'): for x in (None, 'x_test'): for t in (None, 'ts'): with self.subTest(get=get, x=x, t=t): cov = predict_fn_mse_ens(t=t if t is None else ts, get=get, x_test=x if x is None else x_test, compute_cov=True).covariance self.assertAllClose(cov, np.moveaxis(cov, -1, -2)) self.assertGreater(np.min(np.linalg.eigh(cov)[0]), -1e-4)
def growth_factor(cosmo, a, log10_amin=-3, steps=100, eps=1e-4): """ Compute Growth factor at a given scale factor, normalised such that G(a=1) = 1. Parameters ---------- a: array_like Scale factor amin: float Mininum scale factor, default 1e-3 Returns ------- G: ndarray, or float if input scalar Growth factor computed at requested scale factor """ # Check if growth has already been computed if not 'background.growth_factor' in cosmo._workspace.keys(): # Compute tabulated array atab = np.logspace(log10_amin, 0., steps) def D_derivs(y, x, cosmo): q = (2.0 - 0.5 * (Omega_m_a(cosmo, x) + (1.0 + 3.0 * w(cosmo, x)) * Omega_de_a(cosmo, x))) / x r = 1.5 * Omega_m_a(cosmo, x) / x / x return [y[1], -q * y[1] + r * y[0]] y0 = [atab[0], 1.0] y1, y2 = odeint(D_derivs, y0, atab, cosmo) gtab = y1 / y1[-1] cache = {'a': atab, 'g': gtab} cosmo._workspace['background.growth_factor'] = cache else: cache = cosmo._workspace['background.growth_factor'] a = np.clip(np.atleast_1d(a), 10.**log10_amin, 1.0 - eps) return np.clip(interp(a, cache['a'], cache['g']), 0., 1.0)
def _growth_factor_ODE(cosmo, a, log10_amin=-3, steps=128, eps=1e-4): """ Compute linear growth factor D(a) at a given scale factor, normalised such that D(a=1) = 1. Parameters ---------- a: array_like Scale factor amin: float Mininum scale factor, default 1e-3 Returns ------- D: ndarray, or float if input scalar Growth factor computed at requested scale factor """ # Check if growth has already been computed if not "background.growth_factor" in cosmo._workspace.keys(): # Compute tabulated array atab = np.logspace(log10_amin, 0.0, steps) def D_derivs(y, x): q = (2.0 - 0.5 * (Omega_m_a(cosmo, x) + (1.0 + 3.0 * w(cosmo, x)) * Omega_de_a(cosmo, x))) / x r = 1.5 * Omega_m_a(cosmo, x) / x / x return np.array([y[1], -q * y[1] + r * y[0]]) y0 = np.array([atab[0], 1.0]) y = odeint(D_derivs, y0, atab) y1 = y[:, 0] gtab = y1 / y1[-1] # To transform from dD/da to dlnD/dlna: dlnD/dlna = a / D dD/da ftab = y[:, 1] / y1[-1] * atab / gtab cache = {"a": atab, "g": gtab, "f": ftab} cosmo._workspace["background.growth_factor"] = cache else: cache = cosmo._workspace["background.growth_factor"] return np.clip(interp(a, cache["a"], cache["g"]), 0.0, 1.0)
def test_sparse_cov(): n_ell = 25 ell = jnp.logspace(1, 3, n_ell) nz1 = smail_nz(1.0, 2.0, 1.0) nz2 = smail_nz(1.0, 2.0, 0.5) n_cls = 3 P = [probes.NumberCounts([nz1, nz2], constant_linear_bias(1.0))] cl_signal = jnp.ones((n_cls, n_ell)) cl_noise = jnp.ones_like(cl_signal) cov_dense = gaussian_cl_covariance(ell, P, cl_signal, cl_noise, sparse=False) cov_sparse = gaussian_cl_covariance(ell, P, cl_signal, cl_noise, sparse=True) assert cov_sparse.shape == (n_cls, n_cls, n_ell) assert_array_equal(to_dense(cov_sparse), cov_dense)
N = 1500 nus, wav, res = nugrid(22900, 22960, N, unit='AA') # mdbM=moldb.MdbExomol('.database/CO/12C-16O/Li2015',nus) # loading molecular database # molmass=molinfo.molmass("CO") #molecular mass (CO) mdbM = moldb.MdbExomol('.database/H2O/1H2-16O/POKAZATEL', nus, crit=1.e-45) # loading molecular dat molmassM = molinfo.molmass('H2O') # molecular mass (H2O) q = mdbM.qr_interp(1500.0) S = SijT(1500.0, mdbM.logsij0, mdbM.nu_lines, mdbM.elower, q) mask = S > 1.e-25 mdbM.masking(mask) Tarr = jnp.logspace(jnp.log10(800), jnp.log10(1600), 100) qt = vmap(mdbM.qr_interp)(Tarr) SijM = jit(vmap(SijT, (0, None, None, None, 0)))(Tarr, mdbM.logsij0, mdbM.nu_lines, mdbM.elower, qt) imax = jnp.argmax(SijM, axis=0) Tmax = Tarr[imax] print(jnp.min(Tmax)) pl = planck.piBarr(jnp.array([1100.0, 1000.0]), nus) print(pl[1] / pl[0]) pl = planck.piBarr(jnp.array([1400.0, 1200.0]), nus) print(pl[1] / pl[0])
def logspace_epsilons(num_epsilons: int, epsilon: float = 0.017) -> Sequence[float]: """`num_epsilons` of logspace-distributed values, with median `epsilon`.""" if num_epsilons <= 1: return (epsilon, ) return jnp.logspace(1, 8, num_epsilons, base=epsilon**(2. / 9.))
def testNTK_NTKNNGPAgreement(self, train_shape, test_shape, network, out_logits): key = random.PRNGKey(0) key, split = random.split(key) x_train = np.cos(random.normal(split, train_shape)) key, split = random.split(key) y_train = np.array( random.bernoulli(split, shape=(train_shape[0], out_logits)), np.float32) key, split = random.split(key) x_test = np.cos(random.normal(split, test_shape)) _, _, ker_fun = _build_network(train_shape[1:], network, out_logits) reg = 1e-7 prediction = predict.gradient_descent_mse_gp(ker_fun, x_train, y_train, x_test, diag_reg=reg, get='NTK', compute_cov=True) ts = np.logspace(-2, 8, 10) ntk_predictions = [prediction(t).mean for t in ts] # Create a hacked kernel function that always returns the ntk kernel def always_ntk(x1, x2, get=('nngp', 'ntk')): out = ker_fun(x1, x2, get=('nngp', 'ntk')) if get == 'nngp' or get == 'ntk': return out.ntk else: return out._replace(nngp=out.ntk) ntk_nngp_prediction = predict.gradient_descent_mse_gp(always_ntk, x_train, y_train, x_test, diag_reg=reg, get='NNGP', compute_cov=True) ntk_nngp_predictions = [ntk_nngp_prediction(t).mean for t in ts] # Test if you use the nngp equations with the ntk, you get the same mean self.assertAllClose(ntk_predictions, ntk_nngp_predictions, True) # Next test that if you go through the NTK code path, but with only # the NNGP kernel, we recreate the NNGP dynamics. reg = 1e-7 nngp_prediction = predict.gradient_descent_mse_gp(ker_fun, x_train, y_train, x_test, diag_reg=reg, get='NNGP', compute_cov=True) # Create a hacked kernel function that always returns the nngp kernel def always_nngp(x1, x2, get=('nngp', 'ntk')): out = ker_fun(x1, x2, get=('nngp', 'ntk')) if get == 'nngp' or get == 'ntk': return out.nngp else: return out._replace(ntk=out.nngp) nngp_ntk_prediction = predict.gradient_descent_mse_gp(always_nngp, x_train, y_train, x_test, diag_reg=reg, get='NTK', compute_cov=True) nngp_cov_predictions = [nngp_prediction(t).covariance for t in ts] nngp_ntk_cov_predictions = [ nngp_ntk_prediction(t).covariance for t in ts ] # Test if you use the ntk equations with the nngp, you get the same cov # Although, due to accumulation of numerical errors, only roughly. self.assertAllClose(nngp_cov_predictions, nngp_ntk_cov_predictions, True)
def _newton_update(weights_0, X, XX_T, target, k, method_, maxiter=int(1024), ftol=1e-12, gtol=1e-8, reg_lambda=0.0, reg_mu=None, ref_row=True, initializer=None, reg_format=None): L_list = [ float( _objective(weights_0, X, XX_T, target, k, method_, reg_lambda, reg_mu, ref_row, initializer, reg_format)) ] weights = weights_0.copy() # TODO move this to the initialization if method_ is None: weights = jax_np.zeros_like(weights) for i in range(0, maxiter): gradient = _gradient(weights, X, XX_T, target, k, method_, reg_lambda, reg_mu, ref_row, initializer, reg_format) if jax_np.abs(gradient).sum() < gtol: break # FIXME hessian is ocasionally NaN hessian = _hessian(weights, X, XX_T, target, k, method_, reg_lambda, reg_mu, ref_row, initializer, reg_format) if method_ == 'FixDiag': updates = gradient / hessian else: try: inverse = scipy.linalg.pinv2(hessian) updates = jax_np.matmul(inverse, gradient) except (np.linalg.LinAlgError, ValueError) as err: logging.error(err) updates = gradient for step_size in jax_np.hstack( (jax_np.linspace(1, 0.1, 10), jax_np.logspace(-2, -32, 31))): tmp_w = weights - (updates * step_size).ravel() if jax_np.any(jax_np.isnan(tmp_w)): logging.debug("{}: There are NaNs in tmp_w".format(method_)) L = _objective(tmp_w, X, XX_T, target, k, method_, reg_lambda, reg_mu, ref_row, initializer, reg_format) if (L - L_list[-1]) < 0: break L_list.append(float(L)) logging.debug( "{}: after {} iterations log-loss = {:.7e}, sum_grad = {:.7e}". format(method_, i, L, jax_np.abs(gradient).sum())) if jax_np.isnan(L): logging.error("{}: log-loss is NaN".format(method_)) break if i >= 5: if (float(np.min(np.diff(L_list[-5:]))) > -ftol) & \ (float(np.sum(np.diff(L_list[-5:])) > 0) == 0): weights = tmp_w.copy() logging.debug( '{}: Terminate as there is not enough changes on loss.'. format(method_)) break if (L_list[-1] - L_list[-2]) > 0: logging.debug('{}: Terminate as the loss increased {}.'.format( method_, jax_np.diff(L_list[-2:]))) break else: weights = tmp_w.copy() L = _objective(weights, X, XX_T, target, k, method_, reg_lambda, reg_mu, ref_row, initializer, reg_format) logging.debug( "{}: after {} iterations final log-loss = {:.7e}, sum_grad = {:.7e}". format(method_, i, L, jax_np.abs(gradient).sum())) return weights
def logspace(*args, **kwargs): return JaxArray(jnp.logspace(*args, **kwargs))
import jax.numpy as jnp import numpy as onp import json_tricks as json import utils import stein import kernels import distributions import models import config as cfg key = random.PRNGKey(0) # Poorly conditioned Gaussian d = 50 variances = jnp.logspace(-5, 0, num=d) target = distributions.Gaussian(jnp.zeros(d), variances) proposal = distributions.Gaussian(jnp.zeros(d), jnp.ones(d)) @partial(jit, static_argnums=1) def get_sd(samples, fun): """Compute SD(samples, p) given witness function fun""" return stein.stein_discrepancy(samples, target.logpdf, fun) def kl_gradient(x): """Optimal witness function.""" return grad(lambda x: target.logpdf(x) - proposal.logpdf(x))(x)
def main(_): b_mode = False std1 = jnp.expand_dims(fits.getdata(FLAGS.std1).astype('float32'), -1) std2 = jnp.expand_dims(fits.getdata(FLAGS.std2).astype('float32'), -1) sigma_gamma = jnp.concatenate([std1, std2], axis=-1) #fits.writeto("./sigma_gamma.fits", onp.array(sigma_gamma), overwrite=False) def log_likelihood(x, sigma, meas_shear, mask, sigma_mask): """ Likelihood function at the level of the measured shear """ if b_mode: x = x.reshape((360, 360, 2)) ke = x[..., 0] kb = x[..., 1] else: ke = x.reshape((360, 360)) kb = jnp.zeros(ke.shape) model_shear = jnp.stack(ks93inv(ke, kb), axis=-1) return -jnp.sum((model_shear - meas_shear)**2 / ((sigma_gamma)**2 + sigma**2 + sigma_mask)) / 2. likelihood_score = jax.vmap(jax.grad(log_likelihood), in_axes=[0, 0, None, None, None]) map_size = fits.getdata(FLAGS.mask).astype('float32').shape[0] # Make the network #model = hk.transform_with_state(forward_fn) model = hk.without_apply_rng(hk.transform_with_state(forward_fn)) rng_seq = hk.PRNGSequence(42) params, state = model.init(next(rng_seq), jnp.zeros((1, map_size, map_size, 2)), jnp.zeros((1, 1, 1, 1)), is_training=True) # Load the weights of the neural network if not FLAGS.gaussian_only: with open(FLAGS.model_weights, 'rb') as file: params, state, sn_state = pickle.load(file) residual_prior_score = partial(model.apply, params, state, next(rng_seq), is_training=True) pixel_size = jnp.pi * FLAGS.resolution / 180. / 60. #rad/pixel # Load prior power spectrum ps_data = onp.load(FLAGS.gaussian_path).astype('float32') ell = jnp.array(ps_data[0, :]) # 4th channel for massivenu ps_halofit = jnp.array(ps_data[1, :] / pixel_size**2) # normalisation by pixel size # convert to pixel units of our simple power spectrum calculator kell = ell / 2 / jnp.pi * 360 * pixel_size / map_size # Interpolate the Power Spectrum in Fourier Space power_map = jnp.array(make_power_map(ps_halofit, map_size, kps=kell)) # Load the noiseless convergence map if not FLAGS.COSMOS: print('i am here') convergence = fits.getdata(FLAGS.convergence).astype('float32') # Get the correspinding shear gamma1, gamma2 = ks93inv(convergence, onp.zeros_like(convergence)) if not FLAGS.no_cluster: print('adding a cluster') # Compute NFW profile shear map g1_NFW, g2_NFW = gen_nfw_shear(x_cen=FLAGS.x_cluster, y_cen=FLAGS.y_cluster, resolution=FLAGS.resolution, nx=map_size, ny=map_size, z=FLAGS.z_halo, m=FLAGS.mass_halo, zs=FLAGS.zs) # Shear with added NFW cluster gamma1 += g1_NFW gamma2 += g2_NFW # Target convergence map with the added cluster #ke_cluster, kb_cluster = ks93(g1_cluster, g2_cluster) # Add noise the shear map if FLAGS.cosmos_noise_realisation: print('cosmos noise real') gamma1 += fits.getdata(FLAGS.cosmos_noise_e1).astype('float32') gamma2 += fits.getdata(FLAGS.cosmos_noise_e2).astype('float32') else: gamma1 += std1[..., 0] * jax.random.normal( jax.random.PRNGKey(42), gamma1.shape) #onp.random.randn(map_size,map_size) gamma2 += std2[..., 0] * jax.random.normal( jax.random.PRNGKey(43), gamma2.shape) #onp.random.randn(map_size,map_size) # Load the shear maps and corresponding mask gamma = onp.stack( [gamma1, gamma2], -1) # Shear is expected in the format [map_size,map_size,2] else: # Load the shear maps and corresponding mask g1 = fits.getdata('../data/COSMOS/cosmos_full_e1_0.29arcmin360.fits' ).astype('float32').reshape([map_size, map_size, 1]) g2 = fits.getdata('../data/COSMOS/cosmos_full_e2_0.29arcmin360.fits' ).astype('float32').reshape([map_size, map_size, 1]) gamma = onp.concatenate([g1, g2], axis=-1) mask = jnp.expand_dims(fits.getdata(FLAGS.mask).astype('float32'), -1) # has shape [map_size,map_size,1] masked_true_shear = gamma * mask #fits.writeto("./input_shear.fits", onp.array(masked_true_shear), overwrite=False) sigma_mask = (1 - mask) * 1e10 def score_fn(params, state, x, sigma, is_training=False): if b_mode: x = x.reshape((-1, 360, 360, 2)) ke = x[..., 0] kb = x[..., 1] else: ke = x.reshape((-1, 360, 360)) if FLAGS.gaussian_prior: # If requested, first compute the Gaussian prior gs = gaussian_prior_score(ke, sigma.reshape((-1, 1, 1)), power_map) gs = jnp.expand_dims(gs, axis=-1) #print((jnp.abs(sigma.reshape((-1,1,1,1)))**2).shape, (gs).shape) net_input = jnp.concatenate([ ke.reshape((-1, 360, 360, 1)), jnp.abs(sigma.reshape((-1, 1, 1, 1)))**2 * gs ], axis=-1) res, state = model.apply(params, state, net_input, sigma.reshape((-1, 1, 1, 1)), is_training=is_training) if b_mode: gsb = gaussian_prior_score_b(kb, sigma.reshape((-1, 1, 1))) gsb = jnp.expand_dims(gsb, axis=-1) else: gsb = jnp.zeros_like(res) else: res, state = model.apply(params, state, ke.reshape((-1, 360, 360, 1)), sigma.reshape((-1, 1, 1, 1)), is_training=is_training) gs = jnp.zeros_like(res) gsb = jnp.zeros_like(res) return _, res, gs, gsb score_fn = partial(score_fn, params, state) def score_prior(x, sigma): if b_mode: _, res, gaussian_score, gsb = score_fn(x.reshape(-1, 360, 360, 2), sigma.reshape(-1, 1, 1, 1)) else: _, res, gaussian_score, gsb = score_fn(x.reshape(-1, 360, 360), sigma.reshape(-1, 1, 1)) ke = (res[..., 0:1] + gaussian_score).reshape(-1, 360 * 360) kb = gsb[..., 0].reshape(-1, 360 * 360) if b_mode: return jnp.stack([ke, kb], axis=-1) else: return ke def total_score_fn(x, sigma): if b_mode: sl = likelihood_score(x, sigma, masked_true_shear, mask, sigma_mask).reshape(-1, 360 * 360, 2) else: sl = likelihood_score(x, sigma, masked_true_shear, mask, sigma_mask).reshape(-1, 360 * 360) sp = score_prior(x, sigma) if b_mode: return (sl + sp).reshape(-1, 360 * 360 * 2) else: return (sl + sp).reshape(-1, 360 * 360) #return (sp).reshape(-1, 360*360,2) # Prepare the input with a high noise level map initial_temperature = FLAGS.initial_temperature delta_tmp = initial_temperature #onp.sqrt(initial_temperature**2 - 0.148**2) initial_step_size = FLAGS.initial_step_size #0.018 min_steps_per_temp = FLAGS.min_steps_per_temp #10 init_image, _ = ks93(mask[..., 0] * masked_true_shear[..., 0], mask[..., 0] * masked_true_shear[..., 1]) init_image = jnp.expand_dims(init_image, axis=0) init_image = jnp.repeat(init_image, FLAGS.batch_size, axis=0) init_image += (delta_tmp * onp.random.randn(FLAGS.batch_size, 360, 360)) def make_kernel_fn(target_log_prob_fn, target_score_fn, sigma): return ScoreHamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, target_score_fn=target_score_fn, step_size=initial_step_size * (jnp.max(sigma) / initial_temperature)**0.5, num_leapfrog_steps=3, num_delta_logp_steps=4) tmc = TemperedMC( target_score_fn=total_score_fn, #score_prior, inverse_temperatures=initial_temperature * jnp.ones([FLAGS.batch_size]), make_kernel_fn=make_kernel_fn, gamma=0.98, min_temp=8e-3, min_steps_per_temp=min_steps_per_temp, num_delta_logp_steps=4) num_burnin_steps = int(0) samples, trace = tfp.mcmc.sample_chain( num_results=2, #FLAGS.num_steps, current_state=init_image.reshape([FLAGS.batch_size, -1]), kernel=tmc, num_burnin_steps=num_burnin_steps, num_steps_between_results=6000, #num_results//FLAGS.num_steps, trace_fn=lambda _, pkr: (pkr.pre_tempering_results.is_accepted, pkr. post_tempering_inverse_temperatures, pkr.tempering_log_accept_ratio), seed=jax.random.PRNGKey(int(time.time()))) sol = samples[-1, ...].reshape(-1, 360, 360) from scipy import integrate @jax.jit def dynamics(t, x): if b_mode: x = x.reshape([-1, 360, 360, 2]) return -0.5 * total_score_fn( x, sigma=jnp.ones( (FLAGS.batch_size, 1, 1, 1)) * jnp.sqrt(t)).reshape([-1]) else: x = x.reshape([-1, 360, 360]) return -0.5 * total_score_fn( x, sigma=jnp.ones( (FLAGS.batch_size, 1, 1)) * jnp.sqrt(t)).reshape([-1]) init_ode = sol last_trace = jnp.mean(trace[1][-1]) noise = last_trace start_and_end_times = jnp.logspace(jnp.log10(0.99 * noise**2), -5, num=50) solution = integrate.solve_ivp(dynamics, [noise**2, (1e-5)], init_ode.flatten(), t_eval=start_and_end_times) denoised = solution.y[:, -1].reshape([FLAGS.batch_size, 360, 360]) fits.writeto("./results/" + FLAGS.output_folder + "/samples_hmc_" + FLAGS.output_file + ".fits", onp.array(sol), overwrite=False) fits.writeto("./results/" + FLAGS.output_folder + "/samples_denoised_" + FLAGS.output_file + ".fits", onp.array(denoised), overwrite=False) print('end of sampling')
vmap_ts_batch = 1 vmap_beta_batch = 1 vmap_sig_batch = 1 vmap_diag_batch = 1 num_of_gpus = -1 b_std = 0. diag_min = -3 diag_max = 1 num_diag = 100 dig_reg = 1e-4 save_path = '/Volumes/ravidziv/info_ntk/logs/{}_results.csv' run_metrics = ['losses', 'ixt', 'dkl_output'] train_images, train_labels, test_images, test_labels = load_data( dataset=dataset, train_size=train_size, test_size=test_size) ts = np.logspace(ts_min, ts_max, num_ts) # ts = np.array([1e20]) sigs = np.logspace(sigs_min, sigs_max, num_sigs) # diag_regs = np.logspace(diag_min, diag_max, num_diag) betas = np.logspace(beta_min, beta_max, num_betas) # betas = np.array([0.]) metrics = { 'losses': MetricsTuple(tuple(loss_metrics_name), get_losses, args=tuple({})), 'ixt': MetricsTuple(tuple(ixt_metrics_name), get_info_nec, args=tuple({'num_of_samples': 2})), 'dkl_output': MetricsTuple(tuple(dkl_metrics_name), get_kl_posterior_prior,
def testNTK_NTKNNGPAgreement(self, train_shape, test_shape, network, out_logits): _, x_test, x_train, y_train = self._get_inputs(out_logits, test_shape, train_shape) _, _, ker_fun = _build_network(train_shape[1:], network, out_logits) reg = 1e-7 predictor = predict.gradient_descent_mse_ensemble(ker_fun, x_train, y_train, diag_reg=reg) ts = np.logspace(-2, 8, 10).reshape((5, 2)) for t in (None, 'ts'): for x in (None, 'x_test'): with self.subTest(t=t, x=x): x = x if x is None else x_test t = t if t is None else ts ntk = predictor(t=t, get='ntk', x_test=x) # Test time broadcasting if t is not None: ntk_ind = np.array([ predictor(t=t, get='ntk', x_test=x) for t in t.ravel() ]).reshape(t.shape + ntk.shape[2:]) self.assertAllClose(ntk_ind, ntk) # Create a hacked kernel function that always returns the ntk kernel def always_ntk(x1, x2, get=('nngp', 'ntk')): out = ker_fun(x1, x2, get=('nngp', 'ntk')) if get == 'nngp' or get == 'ntk': return out.ntk else: return out._replace(nngp=out.ntk) predictor_ntk = predict.gradient_descent_mse_ensemble( always_ntk, x_train, y_train, diag_reg=reg) ntk_nngp = predictor_ntk(t=t, get='nngp', x_test=x) # Test if you use nngp equations with ntk, you get the same mean self.assertAllClose(ntk, ntk_nngp) # Next test that if you go through the NTK code path, but with only # the NNGP kernel, we recreate the NNGP dynamics. # Create a hacked kernel function that always returns the nngp kernel def always_nngp(x1, x2, get=('nngp', 'ntk')): out = ker_fun(x1, x2, get=('nngp', 'ntk')) if get == 'nngp' or get == 'ntk': return out.nngp else: return out._replace(ntk=out.nngp) predictor_nngp = predict.gradient_descent_mse_ensemble( always_nngp, x_train, y_train, diag_reg=reg) nngp_cov = predictor(t=t, get='nngp', x_test=x, compute_cov=True).covariance # test time broadcasting for covariance nngp_ntk_cov = predictor_nngp(t=t, get='ntk', x_test=x, compute_cov=True).covariance if t is not None: nngp_ntk_cov_ind = np.array([ predictor_nngp(t=t, get='ntk', x_test=x, compute_cov=True).covariance for t in t.ravel() ]).reshape(t.shape + nngp_cov.shape[2:]) self.assertAllClose(nngp_ntk_cov_ind, nngp_ntk_cov) # Test if you use ntk equations with nngp, you get the same cov # Although, due to accumulation of numerical errors, only roughly. self.assertAllClose(nngp_cov, nngp_ntk_cov)