def test_constant_mean_fixed_variance(self): rng = RandomState(1234) variance = 2 + rng.standard_normal(self.y.shape[0])**2.0 std = np.sqrt(variance) y = pd.Series(std * rng.standard_normal(self.y_series.shape[0]), index=self.y_series.index) mod = ConstantMean(y, volatility=FixedVariance(variance)) res = mod.fit(disp=DISPLAY) res.summary() assert len(res.params) == 2 assert "scale" in res.params.index mod = ARX(self.y_series, lags=[1, 2, 3], volatility=FixedVariance(variance)) res = mod.fit(disp=DISPLAY) assert len(res.params) == 5 assert "scale" in res.params.index mod = ARX( self.y_series, lags=[1, 2, 3], volatility=FixedVariance(variance, unit_scale=True), ) res = mod.fit(disp=DISPLAY) assert len(res.params) == 4 assert "scale" not in res.params.index
def test_gauss_inv(self): n = 25 rs = RandomState(self.bit_generator(*self.data1['seed'])) gauss = rs.standard_normal(n) assert_allclose(gauss, gauss_from_uint(self.data1['data'], n, 'dsfmt')) rs = RandomState(self.bit_generator(*self.data2['seed'])) gauss = rs.standard_normal(25) assert_allclose(gauss, gauss_from_uint(self.data2['data'], n, 'dsfmt'))
def test_gauss_inv(self): n = 25 rs = RandomState(self.bit_generator(*self.data1["seed"])) gauss = rs.standard_normal(n) assert_allclose(gauss, gauss_from_uint(self.data1["data"], n, self.bits)) rs = RandomState(self.bit_generator(*self.data2["seed"])) gauss = rs.standard_normal(25) assert_allclose(gauss, gauss_from_uint(self.data2["data"], n, self.bits))
def bs_setup(): rng = RandomState(1234) y = rng.standard_normal(1000) x = rng.standard_normal((1000, 2)) z = rng.standard_normal((1000, 1)) y_series = pd.Series(y) x_df = pd.DataFrame(x) z_df = pd.DataFrame(z) def func(y, axis=0): return y.mean(axis=axis) return BSData(rng, x, y, z, x_df, y_series, z_df, func)
def exog_format(request): xtyp, shape, full = request.param rng = RandomState(123456) x_fcast = rng.standard_normal(shape) orig = x_fcast.copy() nobs = SP500.shape[0] if full: if x_fcast.ndim == 2: _x = np.full((nobs, shape[1]), np.nan) _x[-1:] = x_fcast x_fcast = _x elif x_fcast.ndim == 3: _x = np.full((shape[0], nobs, shape[-1]), np.nan) _x[:, -1:] = x_fcast x_fcast = _x else: # No full 1d return None, None if xtyp == "pandas": if x_fcast.ndim == 3: return None, None if x_fcast.ndim == 1: x_fcast = pd.Series(x_fcast) else: x_fcast = pd.DataFrame(x_fcast) x_fcast.index = SP500.index[-x_fcast.shape[0]:] elif xtyp == "dict": if x_fcast.ndim == 3: keys = [f"x{i}" for i in range(1, x_fcast.shape[0] + 1)] x_fcast = {k: x_fcast[i] for i, k in enumerate(keys)} else: x_fcast = {"x1": x_fcast} return x_fcast, orig
def test_x_reformat_1var(exog_format): # (10,) # (1,10) # (n, 10) # (1,1,10) # (1,n,10) # {"x1"} : (10,) # {"x1"} : (1,10) # {"x1"} : (n,10) exog, ref = exog_format if exog is None: return if isinstance(exog, dict): nexog = len(exog) else: if np.ndim(exog) == 3: nexog = exog.shape[0] else: nexog = 1 cols = [f"x{i}" for i in range(1, nexog + 1)] rng = RandomState(12345) x = pd.DataFrame(rng.standard_normal((SP500.shape[0], nexog)), columns=cols, index=SP500.index) mod = ARX(SP500, lags=1, x=x) res = mod.fit() fcasts = res.forecast(horizon=10, x=exog, reindex=False) ref = res.forecast(horizon=10, x=ref, reindex=False) assert_allclose(fcasts.mean, ref.mean)
def test_x_forecasting(nexog): rng = RandomState(12345) mod = arch_model(None, mean="ARX", lags=2) data = mod.simulate([0.1, 1.2, -0.6, 0.1, 0.1, 0.8], nobs=1000) cols = [f"x{i}" for i in range(1, nexog + 1)] x = pd.DataFrame( rng.standard_normal((data.data.shape[0], nexog)), columns=cols, index=data.data.index, ) b = np.array([0.25, 0.5]) if x.shape[1] == 2 else np.array([0.25]) y = data.data + x @ b y.name = "y" mod = arch_model(y, x, mean="ARX", lags=2) res = mod.fit(disp="off") x_fcast = np.zeros((x.shape[1], 1, 10)) for i in range(x_fcast.shape[0]): x_fcast[i] = np.arange(100 * i, 100 * i + 10) forecasts = res.forecast(x=x_fcast, horizon=10, reindex=False) direct = np.zeros(12) direct[:2] = y.iloc[-2:] p0, p1, p2 = res.params[:3] b0 = res.params[3] b1 = res.params[4] if x.shape[1] == 2 else 0.0 for i in range(10): direct[i + 2] = p0 + p1 * direct[i + 1] + p2 * direct[i] direct[i + 2] += b0 * (i) direct[i + 2] += b1 * (100 + i) assert_allclose(forecasts.mean.iloc[0], direct[2:])
def test_normal(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A,crit,sig = stats.anderson(x1) assert_array_less(crit[:-1], A) A,crit,sig = stats.anderson(x2) assert_array_less(A, crit[-2:])
def test_normal(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A, crit, sig = stats.anderson(x1) assert_array_less(crit[:-1], A) A, crit, sig = stats.anderson(x2) assert_array_less(A, crit[-2:])
def check_expon(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A,crit,sig = scipy.stats.anderson(x1,'expon') assert_array_less(A, crit[-2:]) A,crit,sig = scipy.stats.anderson(x2,'expon') assert_array_less(crit[:-1], A)
def signal(self): nums = self.numsamples depth = self.depth # maximum depth depending on number of samples max_depth = int( log(nums) / log(2) ) if depth > max_depth: depth = max_depth print 'Pink noise filter depth set to maximum possible value of %d.' % max_depth rnd_gen = RandomState(self.seed) s = rnd_gen.standard_normal(nums) for _ in range(depth): ind = 2**_-1 lind = nums-ind dind = 2**(_+1) s[ind:] += repeat( rnd_gen.standard_normal(nums / dind+1 ), dind)[:lind] # divide by sqrt(depth+1.5) to get same overall level as white noise return self.rms/sqrt(depth+1.5) * s
def signal(self): """ Deliver the signal. Returns ------- Array of floats The resulting signal as an array of length :attr:`~SignalGenerator.numsamples`. """ rnd_gen = RandomState(self.seed) return self.rms * rnd_gen.standard_normal(self.numsamples)
def signal(self): """ Deliver the signal. Returns ------- Array of floats The resulting signal as an array of length :attr:`~SignalGenerator.numsamples`. """ rnd_gen = RandomState(self.seed) return self.rms*rnd_gen.standard_normal(self.numsamples)
def signal(self): nums = self.numsamples depth = self.depth # maximum depth depending on number of samples max_depth = int(log(nums) / log(2)) if depth > max_depth: depth = max_depth print 'Pink noise filter depth set to maximum possible value of %d.' % max_depth rnd_gen = RandomState(self.seed) s = rnd_gen.standard_normal(nums) for _ in range(depth): ind = 2**_ - 1 lind = nums - ind dind = 2**(_ + 1) s[ind:] += repeat(rnd_gen.standard_normal(nums / dind + 1), dind)[:lind] # divide by sqrt(depth+1.5) to get same overall level as white noise return self.rms / sqrt(depth + 1.5) * s
def test_expon(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A,crit,sig = stats.anderson(x1,'expon') assert_array_less(A, crit[-2:]) olderr = np.seterr(all='ignore') try: A,crit,sig = stats.anderson(x2,'expon') finally: np.seterr(**olderr) assert_(A > crit[-1])
def test_expon(self): rs = RandomState(1234567890) x1 = rs.standard_exponential(size=50) x2 = rs.standard_normal(size=50) A, crit, sig = stats.anderson(x1, 'expon') assert_array_less(A, crit[-2:]) olderr = np.seterr(all='ignore') try: A, crit, sig = stats.anderson(x2, 'expon') finally: np.seterr(**olderr) assert_(A > crit[-1])
def test_iid_unequal_equiv(): rs = RandomState(0) x = rs.standard_normal(500) rs1 = RandomState(0) bs1 = IIDBootstrap(x, random_state=rs1) rs2 = RandomState(0) bs2 = IndependentSamplesBootstrap(x, random_state=rs2) v1 = bs1.var(np.mean) v2 = bs2.var(np.mean) assert_allclose(v1, v2)
def test_iid_unequal_equiv(): rs = RandomState(0) x = rs.standard_normal(500) rs1 = RandomState(0) bs1 = IIDBootstrap(x, random_state=rs1) rs2 = RandomState(0) bs2 = IndependentSamplesBootstrap(x, random_state=rs2) v1 = bs1.var(np.mean) v2 = bs2.var(np.mean) assert_allclose(v1, v2) assert isinstance(bs2.index, tuple) assert isinstance(bs2.index[0], list) assert isinstance(bs2.index[0][0], np.ndarray) assert bs2.index[0][0].shape == x.shape
def signal(self): """ Deliver the signal. Returns ------- Array of floats The resulting signal as an array of length :attr:`~SignalGenerator.numsamples`. """ rnd_gen = RandomState(self.seed) ma = self.handle_empty_coefficients(self.ma) ar = self.handle_empty_coefficients(self.ar) sos = tf2sos(ma, ar) ntaps = ma.shape[0] sdelay = round(0.5 * (ntaps - 1)) wnoise = self.rms * rnd_gen.standard_normal( self.numsamples + sdelay) # create longer signal to compensate delay return sosfilt(sos, x=wnoise)[sdelay:]
def test_unequal_reset(): def mean_diff(*args): return args[0].mean() - args[1].mean() rs = RandomState(0) x = rs.standard_normal(800) y = rs.standard_normal(200) orig_state = rs.get_state() bs = IndependentSamplesBootstrap(x, y, random_state=rs) variance = bs.var(mean_diff) assert variance > 0 bs.reset() state = bs.get_state() assert_equal(state[1], orig_state[1]) bs = IndependentSamplesBootstrap(x, y) bs.seed(0) orig_state = bs.get_state() bs.var(mean_diff) bs.reset(use_seed=True) state = bs.get_state() assert_equal(state[1], orig_state[1])
def test_unequal_bs_kwargs(): def mean_diff(x, y): return x.mean() - y.mean() rs = RandomState(0) x = rs.standard_normal(800) y = rs.standard_normal(200) bs = IndependentSamplesBootstrap(x=x, y=y, random_state=rs) variance = bs.var(mean_diff) assert variance > 0 ci = bs.conf_int(mean_diff) assert ci[0] < ci[1] applied = bs.apply(mean_diff, 1000) x = pd.Series(x) y = pd.Series(y) bs = IndependentSamplesBootstrap(x=x, y=y, random_state=rs) variance = bs.var(mean_diff) assert variance > 0 assert len(applied) == 1000
def build_displacement(boxsize, ngrid, power_func, seed=12345): """ Build a grid of the displacement field using the given power spectrum boxsize - Size of the box, units (e.g. Mpc/h) must be consistent with the power spectrum function ngrid - Integer size of the grid, e.g. 32 for a 32x32x32 grid power_func - Power spectrum function to be used on each k-mode, units should be inverse of the box size (e.g. h/Mpc) [seed] - Optional seed for the random number generator. returns disp_grid - (3,ngrid,ngrid,ngrid) array of displacements, with disp_grid[i] giving the displacement along the i-th axis etc. """ mersenne = RandomState(seed) # make the k values k, kmag, inv_k2 = make_k_values(boxsize, ngrid) dk = 2 * pi / boxsize # Distance between the modes (k) # amplitudes of the double integral of density (proportional to potential) mode_ampl = sqrt(power_func(kmag.ravel()) * dk * dk * dk) mode_ampl.shape = (ngrid, ngrid, ngrid) white_real = mersenne.standard_normal(size=(ngrid, ngrid, ngrid)) white_imag = mersenne.standard_normal(size=(ngrid, ngrid, ngrid)) grf_k = (white_real + 1.0j * white_imag) * mode_ampl disp_fld = empty((3, ngrid, ngrid, ngrid), dtype=float64) k_shapes = ((ngrid, 1, 1), (ngrid, 1), (ngrid, )) for axis, k_shape in enumerate(k_shapes): ik_axis = 1.0j * reshape(k, k_shape) * inv_k2 disp_fld[axis] = fftn(grf_k * ik_axis).real rms_disp = sqrt(square(disp_fld).sum(1).mean(dtype=float64)) print 'RMS displacement', rms_disp return disp_fld
def speed_test(): from scipy.special import ndtri # inverse cumulative normal from time import time import numpy as np n_bin = 1000 bins = ndtri((np.arange(n_bin - 1) + 1) / float(n_bin)) random_state = RandomState(seed=123) z = z_standard_normal(10) t0 = time() z = z_standard_normal(10000000, random_state) t1 = time() print('Time to generate {:,} random normals'.format(len(z)), '%.3fs' % (t1 - t0)) z_bin = np.bincount(np.digitize(z, bins), minlength=n_bin) print('Mean', z.mean(), 'variance', z.var()) print('Bin counts in', z_bin.min(), z_bin.max()) bin_low, bin_high = np.argmin(z_bin), np.argmax(z_bin) print('Lowest bin %d in i=%d, max %d in %d' % (z_bin[bin_low], bin_low, z_bin[bin_high], bin_high)) # import pylab as pl # pl.plot(z_bin) # pl.show() t0 = time() z = random_state.standard_normal(z.size) t1 = time() print('Time to generate {:,} random normals'.format(len(z)), '%.3fs' % (t1 - t0)) print('Mean', z.mean(), 'variance', z.var()) z_bin = np.bincount(np.digitize(z, bins), minlength=n_bin) print('Bin counts in', z_bin.min(), z_bin.max()) bin_low, bin_high = np.argmin(z_bin), np.argmax(z_bin) print('Lowest bin %d in i=%d, max %d in %d' % (z_bin[bin_low], bin_low, z_bin[bin_high], bin_high))
def test_unequal_bs(): def mean_diff(*args): return args[0].mean() - args[1].mean() rs = RandomState(0) x = rs.standard_normal(800) y = rs.standard_normal(200) bs = IndependentSamplesBootstrap(x, y, random_state=rs) variance = bs.var(mean_diff) assert variance > 0 ci = bs.conf_int(mean_diff) assert ci[0] < ci[1] applied = bs.apply(mean_diff, 1000) assert len(applied) == 1000 x = pd.Series(x) y = pd.Series(y) bs = IndependentSamplesBootstrap(x, y) variance = bs.var(mean_diff) assert variance > 0 with pytest.raises(ValueError, match="BCa cannot be applied"): bs.conf_int(mean_diff, method="bca")
def test_x_forecasting_simulation_smoke(nexog): rng = RandomState(12345) mod = arch_model(None, mean="ARX", lags=2) data = mod.simulate([0.1, 1.2, -0.6, 0.1, 0.1, 0.8], nobs=1000) cols = [f"x{i}" for i in range(1, nexog + 1)] x = pd.DataFrame( rng.standard_normal((data.data.shape[0], nexog)), columns=cols, index=data.data.index, ) b = np.array([0.25, 0.5]) if x.shape[1] == 2 else np.array([0.25]) y = data.data + x @ b y.name = "y" mod = arch_model(y, x, mean="ARX", lags=2) res = mod.fit(disp="off") x_fcast = np.zeros((x.shape[1], 1, 10)) for i in range(x_fcast.shape[0]): x_fcast[i] = np.arange(100 * i, 100 * i + 10) res.forecast(x=x_fcast, horizon=10, reindex=False, method="simulation", simulations=10)
def make_standard_normal(seed=12345, size=20): prng = RandomState(seed) return prng.standard_normal(size)
def displacement_boost(orig_boxsize, orig_ngrid, boost_ngrid, nrepeat, power_func, seed=12345, log=null_log): """ Like build_displacement but for a boost grid. Build a boost-grid with only high frequency power, to be tiled over the box. For example say you have a 2048^3 basic grid, but you want the high frequencies of a 4096^3, then you could make a 1024^3 grid that is repeated 4 times in each direction (1024*4=4096) and then zero out all the modes that have already been inserted in the 2048. displacement_boost(orig_boxsize, orig_ngrid=2048, boost_ngrid=1024, nrepeat=4, ...) which can be tiled over the box of the displacement field using the given power spectrum orig_boxsize - Size of the original box orig_ngrid - Original ngrid boost_ngrid - ngrid of the boosted grid nrepeat - Number of power_func - Power spectrum function to be used on each k-mode, units should be inverse of the box size (e.g. h/Mpc) [seed] - Optional seed for the random number generator. returns disp_grid - (ngrid,ngrid,ngrid,3) array of displacements, with disp_grid[i] giving the displacement along the i-th axis etc. """ effective_res = boost_ngrid * nrepeat if effective_res % orig_ngrid != 0: raise Exception( 'boost_ngrid*nrepeat = {:,} must be a multiple of {:,}, the original grid size' .format(effective_res, orig_ngrid)) print('- Building boost grid to increase resolution by a factor %d' % (effective_res // orig_ngrid), file=log) mersenne = RandomState(seed) ngrid3 = boost_ngrid**3 boxsize = float(orig_boxsize) / nrepeat print('- Making k values', file=log) # make the k values dk = 2 * pi / boxsize # Distance between the modes (k) # amplitudes of the double integral of density (proportional to potential) print('- Amplitudes of modes', file=log) if boost_ngrid < 32: #inverse k^2 for potential, multiply to account for the 5-pt differencing ampl_func = lambda k: sqrt(power_func(k) * dk * dk * dk) / (k * k) mode_ampl = kfunc_on_grid(boxsize, boost_ngrid, ampl_func, k0_val=0.0, log=log) else: num_pts = boost_ngrid * 5 print( '- Large ngrid={:,}^3, sampling power spectrum 5*ngrid={:,} times'. format(boost_ngrid, num_pts), file=log) kmax = dk * boost_ngrid * sqrt(0.75) resample_k_pts = (arange(num_pts) + 1) * (kmax / num_pts) ampl = sqrt(power_func(resample_k_pts) * dk * dk * dk) print('- Interpolating power spectrum', file=log) #inverse k^2 for potential, multiply to account for the 5-pt differencing ampl_func = lambda k: interp(k, resample_k_pts, ampl) / (k * k) mode_ampl = kfunc_on_grid( boxsize, boost_ngrid, ampl_func, k0_val=0.0, log=log ) #* kfunc_on_grid(boost_ngrid, boost_ngrid, deconv_5pt, k0_val=1.0, log=log, vecfunc=True) # E.g. for 2048 grid we sample modes up to k=1024 dk . In the nrepeat=4 box this corresponds to k=256 imax = orig_ngrid // (2 * nrepeat) print( '- Zero-ing modes (8 corners) :{:,} that are sampled by original {:,}^3 grid' .format(imax, orig_ngrid), file=log) clip_func = lambda k: where(k < (imax * dk), 0.0, 1.0) mode_ampl *= kfunc_on_grid(boxsize, boost_ngrid, clip_func, log=log) print('- Building {:,} random numbers'.format(ngrid3 * 2), file=log) grf_k = mersenne.standard_normal(size=2 * ngrid3).view(dtype=complex128) print('- Scaling', file=log) grf_k *= mode_ampl.ravel() grf_k.shape = (boost_ngrid, boost_ngrid, boost_ngrid) print('- {:,}^3 fourier transform to calculate potential'.format( boost_ngrid), file=log) # 'integral' of displacement field (i.e. proportional to potential, # and the displacement is the gradient of this) int_disp = fftn(grf_k).real print('- 5 point gradient to calculate displacement field', file=log) dx = boxsize / boost_ngrid disp_fld = gradient_5pt_3d(int_disp * (1.0 / dx)).astype(float32) print('- Computing RMS', file=log) rms_disp = sqrt(square(disp_fld).sum(axis=3).mean(dtype=float64)) print('- RMS displacement %.3f kpc/h' % (rms_disp * 1e3, ), file=log) return disp_fld
def min_objective(Xu, Xs, W, last_U, last_V, samples): global skipped_elements learn_rate = 0.05 nusers, nitems = Xu.shape _, nfeatures = Xs.shape nz_indices = np.nonzero(Xu) nz_elements = len(nz_indices[0]) print "non zero entries are: ", nz_elements rs = RandomState(1234567890) U = rs.standard_normal((nusers, dim)) U = normalize(U, norm='l2', axis=1) V = rs.standard_normal((nitems, dim)) V = normalize(V, norm='l2', axis=1) E = np.random.standard_normal((nitems, dim)) prev_V = np.array(V) prev_U = np.array(U) prev_E = np.array(E) skipped = 0 skip_i = [] skip_j = [] # W = np.random.uniform(-0.001, 0.001, (dim, nfeatures))/100 obj = 10 for itera in xrange(nIterations): i = samples[itera][0] j = samples[itera][1] # print "iteration: ", itera # if itera % 2 == 0: # ind = rd.choice(xrange(nz_elements)) # i = nz_indices[0][ind] # j = nz_indices[1][ind] # else: # i = rd.choice(xrange(nusers)) # j = rd.choice(xrange(nitems)) # i = rd.choice(xrange(nusers)) # j = rd.choice(xrange(nitems)) if Xu[i, j] > 0: y = 1 else: y = -1 if skipped_elements[i, j] == 1 or ( last_U != None and y * np.dot(last_U[i, :], last_V[j, :]) < -1): if itera > 1 and itera % 50000 == 0: obj = np.sum((U - prev_U)**2) + np.sum( (V - prev_V)**2) + np.sum((E - prev_E)**2) print "iterations: ", itera, " obj: ", obj prev_V = np.array(V) prev_U = np.array(U) prev_E = np.array(E) # learn_rate /= 1.0001 if obj < 0.01: break skipped += 1 skip_i.append(i) skip_j.append(j) continue # skipped_elements[i, j] = 1 t = y * np.dot(U[i, :], V[j, :]) if 1 - t > 0: gradu = -1 * y * V[j, :] gradv = -1 * y * U[i, :] else: gradu = 0 gradv = 0 # print "original: ", y # print "before: ", np.dot(U[i, :], V[j, :]) U[i, :] = U[i, :] - learn_rate * ( alpha * gradu) - learn_rate * lambd * 2 * U[i, :] # V[j, :] = V[j, :] - learn_rate * (alpha * gradv + (1-alpha) * (-2*np.dot(Xs[j, :].toarray(), W.T) + 2*np.dot((V[j,:]-E[j, :]), np.dot(W, W.T)))) - learn_rate * lambd * 2 * V[j, :] # E[j, :] = E[j, :] - learn_rate * ((1-alpha) * ( 2*np.dot(Xs[j, :].toarray(), W.T) - 2*np.dot((V[j,:]-E[j, :]), np.dot(W, W.T)))) - learn_rate * lambd_E * 2 * E[j, :] V[j, :] = V[j, :] - learn_rate * ( alpha * gradv) - learn_rate * lambd * 2 * V[j, :] # print "after: ", np.dot(U[i, :], V[j, :]) if itera > 1 and itera % 50000 == 0: obj = np.sum((U - prev_U)**2) + np.sum((V - prev_V)**2) + np.sum( (E - prev_E)**2) print "iterations: ", itera, " obj: ", obj prev_V = np.array(V) prev_U = np.array(U) prev_E = np.array(E) # learn_rate /= 1.0001 if obj < 0.01: break # W = W - learn_rate * ((1-alpha) *(-2*np.dot(V[j,:].reshape(1, -1).T, Xs[j, :].toarray().reshape(1, -1)) + 2* np.dot(np.dot(V[j,:].reshape(1, -1).T, V[j, :].reshape(1, -1)), W))) - learn_rate*lambd*2*W # if dump > 1: # print "before: ", Xu[i, j], dump # print "after: ", Xu[i, j], np.dot(U[i,:], V[j, :]) # print grad1u, grad1v, grad2u, grad2v # if itera % 1000 ==0: # su =0 # for k in range(10000): # i = rd.choice(xrange(nusers)) # j = rd.choice(xrange(nitems)) # if Xu[i, j] == 0: # y = -1 # else: # y = 1 # if math.isnan(np.dot(U[i,:], V[j, :])): # print U[i,:], V[j, :], np.dot(U[i,:], V[j, :]) # return # su+= np.dot(y*U[i,:], V[j, :]) # print "objective: ", su/10000.0 # # learn_rate /= 1.1 print "skipped: ", skipped skipped_elements[skip_i, skip_j] = 1 return U, V
def build_displacement(boxsize, ngrid, power_func, seed=12345, log=null_log): """ Build a grid of the displacement field using the given power spectrum boxsize - Size of the box, units (e.g. Mpc/h) must be consistent with the power spectrum function ngrid - Integer size of the grid, e.g. 32 for a 32x32x32 grid power_func - Power spectrum function to be used on each k-mode, units should be inverse of the box size (e.g. h/Mpc) [seed] - Optional seed for the random number generator. returns disp_grid - (3,ngrid,ngrid,ngrid) array of displacements, with disp_grid[i] giving the displacement along the i-th axis etc. """ mersenne = RandomState(seed) ngrid3 = ngrid**3 print('- Making k values', file=log) # make the k values dk = 2 * pi / boxsize # Distance between the modes (k) # amplitudes of the double integral of density (proportional to potential) print('- Amplitudes of modes', file=log) if ngrid < 32: #inverse k^2 for potential ampl_func = lambda k: sqrt(power_func(k) * dk * dk * dk) / (k * k) mode_ampl = kfunc_on_grid(boxsize, ngrid, ampl_func, k0_val=0.0, log=log) else: # When ngrid is large, the sampling of the power spectrum is very dense # (i.e. a 1-d function sampled ngrid**3 times), which can be the # most expensive part for complicated functions. Instead, for ngrid>32 # we resample the function ngrid*5 times in linear space, and # interpolate from this. # # Note that this guarantees that the nearest grid point is at most # 9% away in units of dk or 4% in relative. Tests on maximum # interpolation error on power spectrum give ~0.1% for large # (>100 Mpc/h) boxes, up to ~0.6% for small (<<100 Mpc/h) ones. num_pts = ngrid * 5 print( '- Large ngrid={:,}^3, sampling power spectrum 5*ngrid={:,} times'. format(ngrid, num_pts), file=log) kmax = dk * ngrid * sqrt(0.75) resample_k_pts = (arange(num_pts) + 1) * (kmax / num_pts) ampl = sqrt(power_func(resample_k_pts) * dk * dk * dk) print('- Interpolating power spectrum', file=log) #inverse k^2 for potential, multiply to account for the 5-pt differencing # ampl_func = lambda k : interp(k, resample_k_pts, ampl) * deconv_5pt(k*boxsize/ngrid) / (k*k) # mode_ampl = kfunc_on_grid(boxsize, ngrid, ampl_func, k0_val=0.0, log=log) ampl_func = lambda k: interp(k, resample_k_pts, ampl) / (k * k) mode_ampl = kfunc_on_grid( boxsize, ngrid, ampl_func, k0_val=0.0, log=log ) #* kfunc_on_grid(ngrid, ngrid, deconv_5pt, k0_val=1.0, log=log, vecfunc=True) print('- Building {:,} random numbers'.format(ngrid3 * 2), file=log) grf_k = mersenne.standard_normal(size=2 * ngrid3).view(dtype=complex128) print('- Scaling', file=log) grf_k *= mode_ampl.ravel() grf_k.shape = (ngrid, ngrid, ngrid) print('- {:,}^3 fourier transform to calculate potential'.format(ngrid), file=log) # 'integral' of displacement field (i.e. proportional to potential, # and the displacement is the gradient of this) int_disp = fftn(grf_k).real print('- 5 point gradient to calculate displacement field', file=log) dx = boxsize / ngrid d = gradient_5pt_3d(int_disp * (1.0 / dx)) print('- Transposing', file=log) disp_fld = empty((3, ngrid, ngrid, ngrid), dtype=float32) for i in range(3): disp_fld[i] = d[:, :, :, i] rms_disp = sqrt(square(disp_fld).sum(0).mean(dtype=float64)) print('- RMS displacement %.3f kpc/h' % (rms_disp * 1e3, ), file=log) return disp_fld
class PMF(nn.Module): def __init__(self, n_users, n_items, n_factors=30, is_sparse=False, no_cuda=False): super(PMF, self).__init__() self.n_users = n_users self.n_items = n_items self.n_factors = n_factors self.no_cuda = no_cuda self.random_state = RandomState(1) # U: user's latent features for interests self.U = nn.Embedding(n_users, n_factors, sparse=is_sparse) #self.U.weight.data = torch.from_numpy(0.01 * self.random_state.rand(n_users, n_factors)).float() self.U.weight.data = torch.from_numpy( 0.01 * self.random_state.standard_normal(size=(n_users, n_factors))).float() # V: song's latent features for interests self.V = nn.Embedding(n_items, n_factors, sparse=is_sparse) self.V.weight.data = torch.from_numpy( 0.01 * self.random_state.standard_normal(size=(n_items, n_factors))).float() # C: user's latent features for performance self.C = nn.Embedding(n_users, n_factors, sparse=is_sparse) self.C.weight.data = torch.from_numpy( 0.01 * self.random_state.standard_normal(size=(n_users, n_factors))).float() # R: user's latent features for feedbacks of performance self.R = nn.Embedding(n_users, n_factors, sparse=is_sparse) self.R.weight.data = torch.from_numpy( 0.01 * self.random_state.standard_normal(size=(n_users, n_factors))).float() # D: song's latend features for performance self.D = nn.Embedding(n_items, n_factors, sparse=is_sparse) self.D.weight.data = torch.from_numpy( 0.01 * self.random_state.standard_normal(size=(n_items, n_factors))).float() self.alpha = nn.Embedding(n_users, 1, sparse=is_sparse) self.alpha.weight.data = torch.from_numpy(np.array([0.5] * n_users)).float() def forward_listen(self, users_index, items_index): u = self.U(users_index) v = self.V(items_index) like = (u * v).sum(1) return like def forward_sing(self, users_index, items_index): u = self.U(users_index) v = self.V(items_index) c = self.C(users_index) d = self.D(items_index) alpha = self.alpha(users_index) sing = (u * v).sum(1) * alpha + (c * d).sum(1) * (1 - alpha) #sing = (u * v).sum(1) * (c * d).sum(1) return sing def forward_feedback(self, users_index, items_index, friends_index, d): r = self.R(friends_index) c = self.C(users_index) d = self.D(items_index) feedback = (r * c * d).sum(1) return feedback def get_u(self, users_index): return self.U(users_index) def get_v(self, items_index): return self.V(items_index) def __call__(self, *args): return self.forward(*args) def predict_sing(self, users_index, items_index): preds = self.forward_sing(users_index, items_index) return preds