def get_amount(amount_min): amount = 0 while amount < amount_min or amount > amount_max: rv = random.uniform(0, 1) if rv < 0.4: amount = amount_max - int(chi2.rvs(2, loc=50, scale=400)) * 1000 elif rv < 0.85: amount = amount_max - int(chi2.rvs(3, loc=220, scale=435)) * 1000 else: amount = amount_max - int(norm.rvs(loc=595, scale=55)) * 1000 return amount
def chi2_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x,df)) #simulate the chi2 distribution y = [] n=10 for i in range(1000): chi2r=0.0 r = norm.rvs(size=n) for j in range(n): chi2r=chi2r+r[j]**2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(-4, 4, 100) ax.plot(x, t.pdf(x,df)) #simulate the t-distribution y = [] for i in range(1000): rx = norm.rvs() ry = chi2.rvs(df) rt = rx/np.sqrt(ry/df) y.append(rt) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) #simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.show()
def fill(x, y): if x == y: return chi2.rvs(degree_of_freedom - x - 1)[0] elif x < y: return a_lower.pop() else: return 0
def F_distribution(): fig, ax = plt.subplots(1, 1) # display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) # simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx / dfn) / np.sqrt(ry / dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.savefig('F_distribution.png')
def sample_from_prior(self, hps, rng): # uniform distribution on mu comp_k = hps['comp_k'] mu = np.random.rand(comp_k) var = chi2.rvs(self.CHI_VAL, size=comp_k)*hps['var_scale'] + self.EPSILON pi = np.random.dirichlet(np.ones(comp_k)*hps['dir_alpha']) return mu, var, pi
def wishartrand(nu, phi): """Returns wishart distributed variables (modified from https://gist.github.com/jfrelinger/2638485)""" dim = phi.shape[0] chol = cholesky(phi) foo = np.tril(norm.rvs(loc=0,scale=1,size=(dim,dim))) temp = [np.sqrt(chi2.rvs(nu-(i+1)+1)) for i in np.arange(dim)] foo[np.diag_indices(dim)] = temp return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T)))
def SimulateUniforms(self): """docstring for Simulate""" mean = [0.0] * self.size cov = self.copula_covariance s = chi2.rvs(self.dof) Z = multivariate_normal(mean, cov) X = [math.sqrt(self.dof)/math.sqrt(s) * z for z in Z] Y = [t.cdf(x, self.dof) for x in X] return Y
def get_nbh_sizes(n, p): eta = chi2.ppf(0.95, p) c1 = (4*np.pi)**(-p*1./2)*chi2.cdf(2*eta, p) c21 = 1./(np.pi*(p+2))**2*(p*1./2*gammafun(p*1./2))**(4./p) N = 1000000 chi2_rnd = chi2.rvs(p, size=N) c22 = (2*np.pi)**(-1./2+2./p)*np.mean((chi2_rnd <= eta)*(chi2_rnd-p)**2*np.exp(-(1./2-2./p)*chi2_rnd)) c2 = c21*c22 k1 = int(np.ceil(n**(4./(p+4))*(p*c1/c2)**(p*1./(p+4)))) k2 = int(np.ceil(p*1./5*k1*np.sqrt(np.log(np.log(k1))))) return k1, k2
def get_amount(amount_min): amount = 0 while amount < amount_min or amount > amount_max: rv = random.uniform(0, 1) if rv < 0.3: amount = amount_max - int(norm.rvs(loc=120, scale=30)) * 1000 elif rv < 0.55: amount = amount_max - int(chi2.rvs(5, loc=380, scale=145)) * 1000 else: amount = amount_max - int(norm.rvs(loc=660, scale=145)) * 1000 return amount
def wishartrand(self): dim = self.inv_psi.shape[0] foo = np.zeros((dim,dim)) for i in range(dim): for j in range(i+1): if i == j: foo[i,j] = np.sqrt(chi2.rvs(self.nu-(i+1)+1)) else: foo[i,j] = np.random.normal(0,1) return np.dot(self.cholesky, np.dot(foo, np.dot(foo.T, self.cholesky.T)))
def sample_invwishart(S, nu): n = S.shape[0] chol = np.linalg.cholesky(S) if (nu <= 81 + n) and (nu == np.round(nu)): x = npr.randn(nu, n) else: x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n))))) x[np.triu_indices_from(x, 1)] = npr.randn(n*(n-1)//2) R = np.linalg.qr(x, 'r') T = solve_triangular(R.T, chol.T, lower=True).T return np.dot(T, T.T)
def noisy_power_spectrum(S): """ Create a noisy power spectrum, given some input power spectrum S, following the recipe of Vaughan (2010), MNRAS, 402, 307, appendix B Parameters ---------- S : numpy array Theoretical fourier power spectrum, from the first non-zero frequency up to the Nyquist frequency. """ # Number of positive frequencies to calculate. K = len(S) # chi-squared(2) random numbers for all frequencies except the nyquist # frequency X = np.concatenate((chi2.rvs(2, size=K - 1), chi2.rvs(1, size=1))) # power spectrum return S * X / 2.0
def wishartrand(nu, phi): dim = phi.shape[0] chol = cholesky(phi) foo = np.zeros((dim, dim)) for i in range(dim): for j in range(i + 1): if i == j: foo[i, j] = np.sqrt(chi2.rvs(nu - (i + 1) + 1)) else: foo[i, j] = np.random.normal(0, 1) return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T)))
def sample_post(hp, ss): """ Draw samples from the marginal posteriors of mu and sigmasq Murphy, Eqs. 156 & 167 """ z = _intermediates(hp, ss) # Sample from the inverse-chi^2 using the transform from the chi^2 sigmasq_star = z.nu * z.sigmasq / chi2.rvs(z.nu) mu_star = norm.rvs(z.mu, sqrt(sigmasq_star / z.kappa)) return (mu_star, sigmasq_star)
def wishartrand_prec(nu, inv_phi): dim = inv_phi.shape[0] chol = cholesky(inv_phi) #nu = nu+dim - 1 #nu = nu + 1 - np.arange(1,dim+1) #foo = npr.randn(dim,dim ) foo = np.zeros((dim, dim)) for i in range(dim): for j in range(i): foo[i,j] = npr.normal(0,1) foo[i,i] = np.sqrt(chi2.rvs(nu-(i+1)+1)) return np.dot(chol, np.dot(foo, np.dot(foo.T, chol.T)))
def multivariate_t(nu, mu, sigma): """ Density of the multivariate t distribution with nu degress of freedom. Keyword parameters: nu -- degress of fredom mu -- location sigma -- scale """ d = len(mu) Y = multivariate_normal.rvs(mean=np.zeros(d), cov=sigma) U = chi2.rvs(nu) return mu + Y*np.sqrt(nu/U)
def sample_param(self, hps): """ draw a sample """ sigmasq = hps['nu'] * hps['sigmasq'] / chi2.rvs(hps['nu']) std = np.sqrt(sigmasq / hps['kappa']) mu = np.random.normal(hps['mu'], std) return {'sigmasq' : sigmasq, 'mu' : mu}
def sample_wishart_v2(nu, Lambda): """ From Sawyer, et. al. 'Wishart Distributions and Inverse-Wishart Sampling' Runs in constant time Untested """ d = Lambda.shape[0] ch = cholesky(Lambda) T = numpy.zeros((d, d)) for i in xrange(d): if i != 0: T[i, :i] = numpy.random.normal(size=(i,)) T[i, i] = sqrt(chi2.rvs(nu - i + 1)) return dot(dot(dot(ch, T), T.T), ch.T)
def wishartrand(nu, phi): dim = phi.shape[0] chol = cholesky(phi) #nu = nu+dim - 1 #nu = nu + 1 - np.arange(1,dim+1) foo = np.zeros((dim,dim)) for i in range(dim): for j in range(i+1): if i == j: foo[i,j] = np.sqrt(chi2.rvs(nu-(i+1)+1)) else: foo[i,j] = npr.normal(0,1) return dot(chol, dot(foo, dot(foo.T, chol.T)))
def wishart_rnd( nu, S, chol = None ): dim = S.shape[0] if chol is None: chol = np.linalg.cholesky(S) #nu = nu+dim - 1 #nu = nu + 1 - np.arange(1,dim+1) a = np.zeros((dim,dim)) for i in range(dim): for j in range(i+1): if i == j: a[i,j] = np.sqrt(chi2.rvs(nu-(i+1)+1)) else: a[i,j] = np.random.normal(0,1) return np.dot(chol, np.dot(a, np.dot(a.T, chol.T)))
def t_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(-4, 4, 100) ax.plot(x, t.pdf(x,df)) #simulate the t-distribution y = [] for i in range(1000): rx = norm.rvs() ry = chi2.rvs(df) rt = rx/np.sqrt(ry/df) y.append(rt) ax.hist(y, normed=True, alpha=0.2) plt.savefig('t_distribution.png')
def fit(self, X, y): X_X = X.T.dot(X) # Least squares approximate of beta beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y) # The posterior parameters can be determined analytically since we assume conjugate priors for the likelihoods # Normal prior / likelihood => Normal posterior mu_n = np.linalg.pinv(X_X + self.omega0).dot(X_X.dot(beta_hat) + self.omega0.dot(self.mu0)) omega_n = X_X + self.omega0 # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior nu_n = self.nu0 + np.shape(X)[0] sigma_sq_n = (1.0 / nu_n) * (self.nu0 * self.sigma_sq0 + (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) - mu_n.T.dot(omega_n.dot(mu_n)))) # Simulate parameter values for n_iter beta_draws = np.empty((self.n_iterations, np.shape(X)[1])) for i in range(self.n_iterations): # Allows for simulation from the scaled inverse chi squared distribution. X = chi2.rvs(size=1, df=nu_n) sigma_sq = nu_n * sigma_sq_n / X beta = multivariate_normal.rvs(size=1, mean=mu_n[:, 0], cov=sigma_sq * np.linalg.pinv(omega_n)) beta_draws[i, :] = beta self.w = np.mean(beta_draws, axis=0)
def sample(self, eta, size=1): """ A sample of sufficient statistics given the natural parameters, eta. This uses the method detailed by `Smith & Hocking <http://en.wikipedia.org/wiki/Wishart_distribution#Drawing_values_from_the_distribution>`_. """ from scipy.stats import norm, chi2 X = empty((size, self.dimension), float64) n, V = self.theta(eta) L = cholesky(V) std_norm = norm(0, 1) for sample_idx in xrange(size): # avoid singular matrices by resampling until the determinant is != # 0.0 while True: A = zeros((self.p, self.p), dtype=float64) for i in xrange(self.p): A[i, :i] = std_norm.rvs(size=i) A[i, i] = sqrt(chi2.rvs(n - i)) if det(A) != 0.0: break X[sample_idx] = self.T(dot(L, dot(A, dot(A.T, L.T)))) return X
def rchisquare_inv_scaled2(df,scale,size=None): x = chi2.rvs(df,size=size) if (size is None): return (1/x)*df*scale else: return [(1/e)*df*scale for e in x]
from scipy.stats import chi2 import numpy as np import matplotlib.pyplot as plt a = chi2.rvs(0.5, size=100) print(float(np.mean(a))) print(np.std(a)) plt.hist(a, bins=10, density=True, color='green') plt.xlabel('$x$') plt.ylabel('$y$') plt.show()
pl.rc('font', size=14) fig = pl.figure(figsize=(6,5), dpi=100) nud = 24. # number of degrees of freedom of data sigmad = np.sqrt(2.*nud) # standard deviation of data N = 15 # number of samples nu = N-1 # number of degrees of freedom sigma = 2. # standard devaition on measurements # create a short frequency series power spectrum using chi-squared distribution if os.path.isfile('chisquared_data.txt'): # use previously created data if it exists data = np.loadtxt('chisquared_data.txt') else: data = chi2.rvs(nud, size=N) np.savetxt('chisquared_data.txt', data, '%.5f') # add a small spike into one frequency bin data[7] = 55. # plot spectrum pl.errorbar(range(1,N+1), data, yerr=sigmad, fmt='o') ax = pl.gca() ax.set_xlabel('Frequency bin', fontsize=14) ax.set_ylabel('Power spectral density', fontsize=14) ax.text(8.9, 60., '$\hat{\mu} = \\frac{1}{15}\sum x_i = %.1f$' % np.mean(data), fontsize=18, bbox={'facecolor': 'none', 'pad':14, 'ec': 'r'}) fig.subplots_adjust(bottom=0.15) pl.savefig('../chisquared_data.pdf') pl.show()
def sample_var(var_bar, ν, err, N): return (ν * var_bar + err) / chi2.rvs(ν + N)
def chi2_bids(n): return chi2.rvs(4, size=n)
# Copyright: (c) Jean 2019 # Licence: <your licence> #------------------------------------------------------------------------------- import numpy as np from scipy.stats import chi2 import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 55 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') #Display the probability density function (pdf) x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') #Freeze the distribution and display the frozen pdf rv = chi2(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') #Check accuracy of cdf and ppf vals = chi2.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df)) #Generate random numbers: r = chi2.rvs(df, size=1000) #And compare the histogram ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def _draw_scaled_inv_chi_sq(self, n, df, scale): X = chi2.rvs(size=n, df=df) sigma_sq = df * scale / X return sigma_sq
import numpy as np from scipy.stats import chi2 import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: df = 2 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') # Display the probability density function (pdf): x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') # Alternatively, the distribution object can be called (as a function) to fix the shape, location and scale parameters. This returns a “frozen” RV object holding the given parameters fixed. # Freeze the distribution and display the frozen pdf: rv = chi2(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = chi2.ppf([0.001, 0.5, 0.999], df) print np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df)) # Generate random numbers: r = chi2.rvs(df, size=10000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def sumstat_model(theta, sed=None, dem='slab_calzetti', f_downsample=1., statistic='2d', noise=True, seed=None, return_datavector=False, sfr0_prescription='adhoc'): ''' calculate summary statistics for forward model m(theta) :param theta: array of input parameters :param sed: dictionary with SEDs of **central** galaxies :param dem: string specifying the dust empirical model :param f_downsample: if f_downsample > 1., then the SED dictionary is downsampled. :param sfr0_prescription: prescription for dealing with SFR=0 galaxies notes ----- * 09/22/2020: simple noise model implemented * 4/22/2020: extra_data kwarg added. This is to pass pre-sampled observables for SFR = 0 galaxies ''' # don't touch these values! they are set to agree with the binning of # obersvable nbins = [8, 400, 200] ranges = [(20, 24), (-5., 20.), (-5, 45.)] dRmag = 0.5 dGR = 0.0625 dfuvnuv = 0.25 # SFR=0 galaxies sfr0 = (sed['logsfr.inst'] == -999) if sfr0_prescription == 'adhoc': raise ValueError #R_mag_sfr0, G_R_sfr0, FUV_NUV_sfr0 = _observable_zeroSFR( # sed['wave'], # sed['sed_noneb'][sfr0,:]) elif sfr0_prescription == 'sfrmin': logsfr_min = sed['logsfr.inst'][~sfr0].min() # minimum SFR print(logsfr_min) sed['logsfr.inst'][sfr0] = logsfr_min else: raise NotImplementedError sed_dusty = dustFM.Attenuate(theta, sed['wave'], sed['sed_noneb'], sed['sed_onlyneb'], sed['logmstar'], sed['logsfr.inst'], dem=dem) # observational measurements F_mag = measureObs.AbsMag_sed(sed['wave'], sed_dusty, band='galex_fuv') N_mag = measureObs.AbsMag_sed(sed['wave'], sed_dusty, band='galex_nuv') G_mag = measureObs.AbsMag_sed(sed['wave'], sed_dusty, band='g_sdss') R_mag = measureObs.AbsMag_sed(sed['wave'], sed_dusty, band='r_sdss') # apply FUV and NUV cut uv_cut = (F_mag < -13.5) & (N_mag < -14) F_mag = F_mag[uv_cut] N_mag = N_mag[uv_cut] G_mag = G_mag[uv_cut] R_mag = R_mag[uv_cut] # calculate color FUV_NUV = F_mag - N_mag G_R = G_mag - R_mag if sfr0_prescription == 'adhoc': # append sampled SFR=0 observables to data vector R_mag = np.concatenate([R_mag, R_mag_sfr0]) G_R = np.concatenate([G_R, G_R_sfr0]) FUV_NUV = np.concatenate([FUV_NUV, FUV_NUV_sfr0]) n_gal = len(R_mag) if noise: if seed is not None: np.random.seed(seed) # noise model (simplest model) sig_R = chi2.rvs(3, loc=0.02, scale=0.00003, size=n_gal) sig_FN = chi2.rvs(2, loc=0.05, scale=0.05, size=n_gal) sig_GR = chi2.rvs(3, size=n_gal) * (0.00001 * (R_mag + 20.1) + 0.00005)\ + (0.000025 * (R_mag + 20.1) + 0.02835) R_mag += np.random.normal(size=n_gal) * sig_R FUV_NUV += np.random.normal(size=n_gal) * sig_FN G_R += np.random.normal(size=n_gal) * sig_GR data_vector = np.array([-1. * R_mag, G_R, FUV_NUV]).T if return_datavector: return data_vector.T, uv_cut Nbins, _ = np.histogramdd(data_vector, bins=nbins, range=ranges) # volume of simulation vol = {'simba': 100.**3, 'tng': 75.**3, 'eagle': 67.77**3}[sed['sim']] x_model = Nbins.astype(float) / vol / dRmag / dGR / dfuvnuv / f_downsample nbar = dRmag * dGR * dfuvnuv * np.sum(x_model) if statistic == '3d': return [nbar, x_model] elif statistic == '2d': x_r_gr = dfuvnuv * np.sum(x_model, axis=2) x_r_fn = dGR * np.sum(x_model, axis=1) return [nbar, x_r_gr, x_r_fn] elif statistic == '1d': x_gr = dRmag * np.sum(dfuvnuv * np.sum(x_model, axis=2), axis=0) x_fn = dRmag * np.sum(dGR * np.sum(x_model, axis=1), axis=0) return [nbar, x_gr, x_fn]
n = 10000 このシミュレーションでは,`numpy`の関数`zeros()`を使い`for`ループで生成される誤差項の値を格納する`array`を用意する。`zeros()`は`0`が並ぶ`array`を作成する関数であり,一回のループ毎に`0`に誤差項の値が代入されることになる。1行・`n`列の`array`を設定する。 u = np.zeros(n) u 正規分布の標準偏差の値。 u_sd = 0.5 for i in range(n): # (1) prob = 0.05 # (2) dist_1 = norm.rvs(loc=0, scale=u_sd, size=1) #(3) dist_2 = (chi2.rvs(1, size=1) - 1) / np.sqrt(2) # (4) error = prob*(dist_1)+(1-prob)*(dist_2) # (5) u[i] = error # (6) (上のコードの説明) 1. `n`回`for`ループを宣言。 2. `dist_1`の割合 3. 正規分布に従う1つのランダム変数を生成。 4. カイ二乗分布に従う1つのランダム変数を生成。 5. `error`が誤差項 6. この誤差項を`u[]`の`i`番目に代入。 `u`の最初の10の値の確認。
def sample_chi2(nu): return chi2.rvs(nu)
def sample_mean(n): smeans = [] for x in range(1000): x = chi2.rvs(k, size=n) smeans.append(x.mean()) return smeans
import numpy as np import matplotlib.pyplot as plt from scipy.stats import chi2 ## Set seed for reproducibility np.random.seed(101) ## Generate data X = chi2.rvs(4, size=50) ## hist1; default bin count (seems to be 10) plt.figure() plt.hist(X) plt.xlabel("X") plt.ylabel("Count") plt.savefig("../../fig/05_hist1.png") plt.close() ## hist2; more bins (20) plt.figure() plt.hist(X, bins=20) plt.xlabel("X") plt.ylabel("Count") plt.savefig("../../fig/05_hist2.png") plt.close()
n = 5 p = .4 mu = .6 df = 55 # binomial binoarr = binom.rvs(n, p, loc=0, size=nevt, random_state=None) # poisson poisarr = poisson.rvs(mu, loc=0, size=nevt, random_state=None) # gaussian gausarr = norm.rvs(loc=0, size=nevt, random_state=None) # chi2 chi2arr = chi2.rvs(df, loc=0, size=nevt, random_state=None) ichoose = 0 if ichoose == 0: fig, ax = plt.subplots(2, 2) #print (zarr1) ax[0, 0].hist(binoarr, bins=20, range=(0, 10), histtype='step', color='b') ax[0, 0].set(title='binomial', xlabel='x', ylabel='number') ax[0, 1].hist(poisarr, bins=20, range=(0, 10), histtype='step', color='b') ax[0, 1].set(title='poisson', xlabel='x', ylabel='number') ax[1, 0].hist(gausarr, bins=20, range=(0, 10), histtype='step', color='b') ax[1, 0].set(title='gaussian', xlabel='x', ylabel='number')
y = np.random.normal(0, 1, s_size) samples.append([y, i, "normal"]) for i in range(n_of_samples, 2 * n_of_samples): y = bernoulli.rvs(p, size=s_size) samples.append([y, i, "bernoulli"]) for i in range(2 * n_of_samples, 3 * n_of_samples): y = binom.rvs(n, p, size=s_size) samples.append([y, i, "binomial"]) for i in range(3 * n_of_samples, 4 * n_of_samples): y = geom.rvs(p, size=s_size) samples.append([y, i, "geometric"]) for i in range(4 * n_of_samples, 5 * n_of_samples): y = poisson.rvs(n, size=s_size) samples.append([y, i, "poisson"]) outlier_1 = beta.rvs(1, 10, size=1000) outlier_2 = chi2.rvs(n, size=1000) samples.append([outlier_1, 5 * n_of_samples, "beta"]) samples.append([outlier_2, 5 * n_of_samples + 1, "chi_square"]) for i in range(len(samples)): for j in range(i, len(samples)): ks_test_pvalue = ks_2samp(samples[i][0], samples[j][0])[1] epps_singleton_pvalue = epps_singleton_2samp(samples[i][0], samples[j][0])[1] if ks_test_pvalue > 0.05: G.add_edge(i, j, weight=0.01 / (ks_test_pvalue)) #0.01 scaling factor here if epps_singleton_pvalue > 0.05: H.add_edge(i, j, weight=0.01 / (epps_singleton_pvalue)) #0.01 scaling factor here
def mdp_glucose_mHealth_rollout(tuning_function_parameter, mdp_epsilon_policy, time_horizon, x_initials, sx_initials, tuning_function, env, gamma, number_of_value_iterations, mc_replicates): X, Sp1 = env.get_state_transitions_as_x_y_pair() n, X_dim = X.shape # S_dim = Sp1.shape[1] regr = condition_dist_of_next_state2(X, Sp1) beta_hat = regr['beta_hat'] sigma_hat = regr['sigma_hat'] mu_food = regr['mu_food'] sigma_food = regr['sigma_food'] prob_food = regr['prob_food'] mu_activity = regr['mu_activity'] sigma_activity = regr['sigma_activity'] prob_activity = regr['prob_activity'] sampling_cov = regr['sampling_cov'] sampling_sigma_food = regr['sampling_sigma_food'] sampling_sigma_activity = regr['sampling_sigma_activity'] sampling_prob_food = regr['sampling_prob_food'] sampling_prob_activity = regr['sampling_prob_activity'] n_effect_activity = regr['n_effect_activity'] n_effect_food = regr['n_effect_food'] mean_cumulative_reward = 0 for rep in range(mc_replicates): ''' # check again the following sample sigma's ''' ### Sample beta_hat and Sigma_hat from their corresponding sampling distributions. sample_beta_hat = np.random.multivariate_normal(beta_hat, sampling_cov) sample_sigma_hat = np.sqrt( chi2.rvs(df=n - X_dim, size=1) / (n - X_dim)) * (sigma_hat) sample_prob_food = np.random.normal(prob_food, sampling_prob_food) sample_mu_food = np.random.normal(mu_food, sampling_sigma_food) sample_sigma_food = np.sqrt( chi2.rvs(df=n_effect_food - 1, size=1) / (n_effect_food - 1)) * sigma_food sample_prob_activity = np.random.normal(prob_activity, sampling_prob_activity) sample_mu_activity = np.random.normal(mu_activity, sampling_sigma_activity) sample_sigma_activity = np.sqrt( chi2.rvs(df=n_effect_activity - 1, size=1) / (n_effect_activity - 1)) * sigma_activity ''' # check again the x_initials and sx_initials ''' sim_env = Glucose(env.nPatients, COEF=sample_beta_hat, SIGMA_NOISE=sample_sigma_hat, prob_food=sample_prob_food, MU_FOOD=sample_mu_food, SIGMA_FOOD=sample_sigma_food, prob_activity=sample_prob_activity, MU_ACTIVITY=sample_mu_activity, SIGMA_ACTIVITY=sample_sigma_activity, x_initials=x_initials, sx_initials=sx_initials) sim_env.reset() r = 0 for t in range(time_horizon): # if t==30: # print(t, sim_env.current_state, sample_beta_hat) optimal_actions = fitted_q_step1_mHealth_vanilla(sim_env) # optimal_actions = fitted_q_step1_mHealth(sim_env, gamma, RandomForestRegressor, number_of_value_iterations) actions = mdp_epsilon_policy(optimal_actions, tuning_function, tuning_function_parameter, time_horizon, t) # print(t, action, sim_env.current_state[0]) _, reward = sim_env.step(actions) r += reward mean_cumulative_reward += (r - mean_cumulative_reward) / (rep + 1) return mean_cumulative_reward
def _chdir(data, sampleclass, genes, gamma=1., sort=True, calculate_sig=False, nnull=10, sig_only=False, norm_vector=True): """ repurposed from https://github.com/MaayanLab/geode/blob/master/geode/geode.py#L10-L115 Calculate the characteristic direction for a gene expression dataset Input: data: numpy.array, is the data matrix of gene expression where rows correspond to genes and columns correspond to samples sampleclass: list or numpy.array, labels of the samples, it has to be consist of 0, 1 and 2, with 0 being columns to be excluded, 1 being control and 2 being perturbation example: sampleclass = [1,1,1,2,2,2] genes: list or numpy.array, row labels for genes gamma: float, regulaized term. A parameter that smooths the covariance matrix and reduces potential noise in the dataset sort: bool, whether to sort the output by the absolute value of chdir calculate_sig: bool, whether to calculate the significance of characteristic directions nnull: int, number of null characteristic directions to calculate for significance sig_only: bool, whether to return only significant genes; active only when calculate_sig is True norm_vector: bool, whether to return a characteristic direction vector normalized to unit vector Output: A list of tuples sorted by the absolute value in descending order characteristic directions of genes. If calculate_sig is set to True, each tuple contains a third element which is the ratio of characteristic directions to null ChDir """ ## check input data.astype(float) sampleclass = np.array(list(map(int, sampleclass))) # masks m_non0 = sampleclass != 0 m1 = sampleclass[m_non0] == 1 m2 = sampleclass[m_non0] == 2 if type(gamma) not in [float, int]: raise ValueError("gamma has to be a numeric number") if set(sampleclass) != set([1, 2]) and set(sampleclass) != set([0, 1, 2]): raise ValueError( "sampleclass has to be a list whose elements are in only 0, 1 or 2" ) # if m1.sum()<2 or m2.sum()<2: # raise ValueError("Too few samples to calculate characteristic directions") if len(genes) != data.shape[0]: raise ValueError( "Number of genes does not match the demension of the expression matrix" ) ## normalize data data = data[:, m_non0] data = zscore(data) # standardize for each genes across samples ## start to compute n1 = m1.sum() # number of controls n2 = m2.sum() # number of experiments ## the difference between experiment mean vector and control mean vector. meanvec = data[:, m2].mean(axis=1) - data[:, m1].mean(axis=1) ## initialize the pca object pca = PCA(n_components=None) pca.fit(data.T) ## compute the number of PCs to keep cumsum = pca.explained_variance_ratio_ # explained variance of each PC keepPC = len(cumsum[cumsum > 0.001]) # number of PCs to keep v = pca.components_[0:keepPC].T # rotated data r = pca.transform(data.T)[:, 0:keepPC] # transformed data dd = (np.dot(r[m1].T, r[m1]) + np.dot(r[m2].T, r[m2])) / float( n1 + n2 - 2) # covariance sigma = np.mean(np.diag(dd)) # the scalar covariance shrunkMats = np.linalg.inv(gamma * dd + sigma * (1 - gamma) * np.eye(keepPC)) b = np.dot(v, np.dot(np.dot(v.T, meanvec), shrunkMats)) if norm_vector: b /= np.linalg.norm(b) # normalize b to unit vector grouped = zip([abs(item) for item in b], b, genes) if sort: grouped = sorted(grouped, key=lambda x: x[0], reverse=True) if not calculate_sig: # return sorted b and genes. res = [(item[1], item[2]) for item in grouped] return res else: # generate a null distribution of chdirs nu = n1 + n2 - 2 y1 = np.random.multivariate_normal(np.zeros(keepPC), dd, nnull).T * np.sqrt( nu / chi2.rvs(nu, size=nnull)) y2 = np.random.multivariate_normal(np.zeros(keepPC), dd, nnull).T * np.sqrt( nu / chi2.rvs(nu, size=nnull)) y = y2 - y1 ## y is the null of v nullchdirs = [] for col in y.T: bn = np.dot(np.dot(np.dot(v, shrunkMats), v.T), np.dot(col, v.T)) bn /= np.linalg.norm(bn) bn = bn**2 bn.sort() bn = bn[::-1] ## sort in decending order nullchdirs.append(bn) nullchdirs = np.array(nullchdirs).T nullchdirs = nullchdirs.mean(axis=1) b_s = b**2 b_s.sort() b_s = b_s[::-1] # sorted b in decending order relerr = b_s / nullchdirs ## relative error # ratio_to_null ratios = np.cumsum(relerr) / np.sum(relerr) - np.linspace( 1. / len(meanvec), 1, len(meanvec)) res = [(item[1], item[2], ratio) for item, ratio in zip(grouped, ratios)] # print('Number of significant genes: %s'%(np.argmax(ratios)+1)) if sig_only: return res[0:np.argmax(ratios) + 1] else: return res
Mca = 40*amu; Nion = 25; lmd = 396.908e-9; tin = 4; i = 0; a = math.sqrt(k*tin/Mca); VCAI = np.zeros((Nion,4)); #4th dimension = state marker I = np.zeros(Nion); Pabs = 0.5; Pemm = 1.0/7.0; #ensure at least one of the numbers is a float timeint = 10; totaltime = 20000; for d in range(0,3): for p in range(0,Nion): x = math.sqrt(((k * tin) / Mca) * (chi2.rvs(3))); x = Mca * x; VCAI[[p],[d]] = x; VCAIinit = VCAI; VCAID = [] for b in range(0,int(Nion)): VCAID.insert(b,[float(VCAI[[b],[0]]), float(VCAI[[b],[1]]), float(VCAI[[b],[2]]), float(VCAI[[b],[3]])]) counterp = 1; counteri = 1; countdown = 0 for time in range(0,(totaltime+timeint),timeint): countdown1 = (totaltime-time+1)/(10*timeint);
else: # change according to previous guess guess = ((ai_pred[l] - price_last) * (price - price_last) >= 0) if not guess: ai_pred[l] = ai_pred[l] * ( 1 + (price / price_last - 1) * np.random.normal(1, 1, 1)[0]) #print(ai_pred[l]) #ai_pred[l] = ai_pred[l] + (price - price_last) * np.random.normal(1, 1, 1)[0] # ai control if (ai_rich[l] is True) and (ai_stock[l] == 0): # ai renew if rich ai sell all stock ai_pred[l] = np.random.lognormal(0, 0.1, size=num_ai)[0] / norm * price ai_money[l] = chi2.rvs(price_df, size=1)[0] / price_df * 20 exchange = exchange + 1 ai_rich[l] = False elif (ai_money[l] < price and ai_stock[l] == 0): # ai renew if poor ai do not have enough wealth ai_pred[l] = np.random.lognormal(0, 0.1, size=num_ai)[0] / norm * price ai_money[l] = chi2.rvs(price_df, size=1)[0] / price_df * 20 exchange = exchange + 1 elif (ai_wealth[l] > 0.1 * sum(ai_wealth)): # ai marked rich if owns 10% of total wealth ai_pred[l] = 0 ai_rich[l] = True
def getMarginalLikelihood(self): priorMeans, priorVariances = norm.rvs(self.mu0,self.sigma0,size=100), 1./chi2.rvs(self.v0,size=100) bentObservations = np.tile(self.observations,100).reshape(len(self.observations),100) likelihoodUnderPriorSamples = norm.logpdf(bentObservations,loc=priorMeans,scale=priorVariances)[0] return np.exp(lse(likelihoodUnderPriorSamples)-np.log(100))
# # -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from scipy.stats import chi2 df = 5 rv = chi2(df) #Сгенерируйте из него выборку объёма 1000 sampleRange = chi2.rvs(df, size=1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. # plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples') # plt.ylabel('number of samples') # plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = chi2.ppf(0.01, df) right = chi2.ppf(0.99, df) x = np.linspace(left, 20, 100) # plt.plot(x, rv.pdf(x), 'r-', lw=5, alpha=0.7, label='chi2 pdf') plt.legend(loc='best') # plt.show() # values = np.array([pareto.rvs(k, size=10) for x in range(10)]) # print values # plt.hist(values.mean(axis=1), normed=True)
if ai_buy_num[i] == 0: ai_buy_price[i] = small price = min_sell_price volume = volume + trade_num * 2 trade_times = trade_times + 1 if (trade_times == 10): break ai_wealth = ai_stock * price + ai_money # total capital control mean_pred = np.mean(ai_pred) if (mean_pred > price): ai_money = np.array(list(map(add, ai_money, chi2.rvs(1, size=num_ai)))) else: ai_money = np.array(list(map(add, ai_money, -chi2.rvs(1, size=num_ai)))) ai_money[ai_money < 0] = 0 for l in range(num_ai): # ai pred renew r = np.random.uniform(0, 1, size=1)[0] if (r < forget_ratio): # forget about previous guess ai_pred[l] = np.random.lognormal(0, 0.1, size=1)[0] / norm * price #ai_pred[l] = chi2.rvs(price_df, size=1)[0] / price_df * price else: # change according to previous guess guess = ((ai_pred[l] - price_last) * (price - price_last) >= 0) if not guess:
import matplotlib.patches as mpatches # k degrees of freedom k = 5 th_mean, th_var, _, _ = chi2.stats(k, moments='mvsk') print("THEORY MEAN=", th_mean, "THEORY VARIANCE=", th_var) # prob dens function # what X is in 0.01 and 0.99 quantiles start = chi2.ppf(0.01, k) stop = chi2.ppf(0.99, k) # theoretical dist x = np.linspace(start, stop, 100) plt.plot(x, chi2.pdf(x, k), 'r-', lw=5, alpha=0.6, label='chi2 pdf') # histogram # random nums from dist r = chi2.rvs(k, size=1000) plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2) plt.legend(loc='best', frameon=False) plt.title('Theoretical Dist') plt.ylabel('Prob Dens') plt.xlabel('X') plt.show() # calculate means for 1000 expirements with n samples in each def sample_mean(n): smeans = [] for x in range(1000): x = chi2.rvs(k, size=n) smeans.append(x.mean()) return smeans
# # -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from scipy.stats import chi2 df = 5 rv = chi2(df) #Сгенерируйте из него выборку объёма 1000 sampleRange = chi2.rvs(df, size=1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. # plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples') # plt.ylabel('number of samples') # plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = chi2.ppf(0.01, df) right = chi2.ppf(0.99, df) x = np.linspace(left, 20, 100) # plt.plot(x, rv.pdf(x), 'r-', lw=5, alpha=0.7, label='chi2 pdf') plt.legend(loc='best') # plt.show() # values = np.array([pareto.rvs(k, size=10) for x in range(10)]) # print values # plt.hist(values.mean(axis=1), normed=True) m = [] # for _ in xrange(20): # m.append(np.mean(chi2.rvs(df, size=1000)))