def LaplaceBoxplotTukey(): tips, result, count = [], [], 0 for size in sizes: for i in range(NUMBER_OF_REPETITIONS): distribution = laplace.rvs(size=size, scale=1 / m.sqrt(2), loc=0) distribution.sort() count += count_out(distribution) result.append(count / (size * NUMBER_OF_REPETITIONS)) distribution = laplace.rvs(size=size, scale=1 / m.sqrt(2), loc=0) distribution.sort() tips.append(distribution) DrawBoxplot(tips, LAPLACE) printAnswer(result) return
def add_laplace_noise_time(aggregate_type, dfg_time, epsilon_time): laplace_mechanism = privacyMechanisms.LaplaceBoundedDomain() sens_time = 1 """ calculating sensitivity based on type of aggregate""" if aggregate_type == AggregateType.AVG: # sens_time = 1.0 / len(dfg_time[0]) sens_time = 1.0 / len(dfg_time.keys()) elif aggregate_type == AggregateType.MAX or aggregate_type == AggregateType.MIN or aggregate_type == AggregateType.SUM: sens_time = 1 else: assert "Wrong aggregate type" # calculate the DFG for the time dfg_time = calculate_time_dfg(dfg_time, aggregate_type) dfg_time_new = Counter() if type(epsilon_time) != type(0.1): # multiple epsilon values for the time dfg for key in dfg_time.keys(): if epsilon_time[key] == inf or epsilon_time[ key] == -inf or epsilon_time[key] < 1e-11: dfg_time_new[key] = dfg_time[key] else: rv = laplace() noise = laplace.rvs(loc=0, scale=sens_time / epsilon_time[key], size=1)[0] dfg_time_new[key] = dfg_time[key] + abs(noise) else: # single epsilon value for the entire time dfg for key in dfg_time.keys(): # in case epsilon is inf , we don't need to add noise if epsilon_time == inf: dfg_time_new[key] = dfg_time[key] else: rv = laplace() noise = laplace.rvs(loc=0, scale=sens_time / epsilon_time, size=1)[0] dfg_time_new[key] = dfg_time[key] + abs(noise) return dfg_time, dfg_time_new
def obtain_ell(self, p=0): if p == 0.99499: p = 0.995 * 0.85 p = 0.99575 m = self.args.m eps = self.args.epsilon delta = 1 / self.args.n**2 b = eps / (2 * math.log(1 / delta)) noise = 'lap' beta_lt = 0.3 * 0.02 ss, p_percentile = self.obtain_ss(m, p, b) if noise == 'lap': inv_cdf = laplace.ppf(1 - beta_lt) a = eps / 2 ns = laplace.rvs() else: inv_cdf = norm.ppf(1 - beta_lt) a = eps / math.sqrt(-math.log(delta)) ns = norm.rvs() kappa = 1 / (1 - (math.exp(b) - 1) * inv_cdf / a) tau = p_percentile + kappa * ss / a * (ns + inv_cdf) return max(0, tau)
def laprnd(loc, scale): from scipy.stats import laplace from sympy import Symbol, exp, sqrt, pi, Integral import math import matplotlib.pyplot as plt s = laplace.rvs(loc, scale, None) return s
def main(): file.write("Normal distribution\n") run(np.random.normal(0, 1, size=100), 100) file.write("\n\nLaplace distribution\n") run(laplace.rvs(size=20, scale=1 / math.sqrt(2), loc=0), 20) file.write("\n\nUniform distribution\n") run(uniform.rvs(size=20, loc=-math.sqrt(3), scale=2 * math.sqrt(3)), 20)
def sample_gaussian_vs_laplace(n=220, mu=0.0, sigma2=1, b=np.sqrt(0.5)): ''' use scipy to generate two distributions. in our case it is just laplace and normal ''' X = norm.rvs(size=n, loc=mu, scale=sigma2) Y = laplace.rvs(size=n, loc=mu, scale=b) return X, Y
def distributions(size): n = norminvgauss.rvs(1, 0, size=size) l = laplace.rvs(size=size, scale=1 / m.sqrt(2), loc=0) p = poisson.rvs(10, size=size) c = cauchy.rvs(size=size) u = uniform.rvs(size=size, loc=-m.sqrt(3), scale=2 * m.sqrt(3)) counted_distributions = [n, l, p, c, u] return counted_distributions
def futurePrice(self, days, strike, flag='C', model = 'laplace'): if 'laplace' in model: changes = laplace.rvs(0, self.var, size=days) elif 'norm' in model: changes = norm.rvs(0, self.var, size=days) values = exp(changes) self.daily = [float(self.price) * prod(values[0:i + 1]) for i in range(len(values))] self.bs = [BlackScholes(flag, float(price), float(strike), .005, self.vol, float( days - i) / 365) for i, price in enumerate(self.daily)]
def laplaceFunc(): for i in range(len(size)): n = size[i] fig, ax = plt.subplots(1, 1) ax.set_title("Распределение Лапласа, n = " + str(n)) x = np.linspace(laplace.ppf(0.01), laplace.ppf(0.99), 100) ax.plot(x, laplace.pdf(x), 'b-', lw=5, alpha=0.6) r = laplace.rvs(size=n) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) plt.show()
def get_two_distributions(): n = 500 mu = 0.0 sigma = 1 b = np.sqrt(0.5) x = norm.rvs(size=n) * np.sqrt(sigma) + mu y = laplace.rvs(size=n, loc=mu, scale=b) return x, y
def AddLapNoise2(realNum, scal): from scipy.stats import laplace x = realNum + laplace.rvs(scale=scal, size=1) #print x #because we can post-processing differentially private data if x < 0: re = 0 else: re = round(x) return re
def laplace_distribution(select_size, scale=1 / m.sqrt(2), loc=0, asked=rvs, x=0): if asked == rvs: return laplace.rvs(size=select_size, scale=scale, loc=loc) elif asked == pdf: return laplace.pdf(x, loc=loc, scale=scale) elif asked == cdf: return laplace.cdf(x, loc=loc, scale=scale) return
def __call__(self, shape): if self.apply_scale: sqrt_n = np.sqrt(shape[1]) else: sqrt_n = 1 D = laplace.rvs(size=shape) # Scale to unit variance D = D / np.sqrt(2) # Return correctly scaled version of D return D/sqrt_n*self.std + self.mu/sqrt_n**2
def add_laplace_noise_freq(dfg_freq, epsilon_freq): senstivity_freq = 1 dfg_freq_new = Counter() for key in dfg_freq.keys(): rv = laplace() if type(epsilon_freq) == type(0.1): #single epsilon value dfg_freq_new[key] = dfg_freq[key] + abs( laplace.rvs( loc=0, scale=senstivity_freq / epsilon_freq, size=1)[0]) else: #multiple epsilon value dfg_freq_new[key] = dfg_freq[key] + abs( laplace.rvs(loc=0, scale=senstivity_freq / epsilon_freq[key], size=1)[0]) return dfg_freq_new
def visualise_distribution_test_statistic(self, alpha=0.05): num_samples = 500 # we first sample null distribution null_samples = self._mmd.sample_null() # we then sample alternative distribution, generate new data for that alt_samples = np.zeros(num_samples) for i in range(num_samples): x = norm.rvs(size=self._n, loc=self._mu, scale=self._sigma_squared) y = laplace.rvs(size=self._n, loc=self._mu, scale=self._b) feat_p = sg.RealFeatures(np.reshape(x, (1, len(x)))) feat_q = sg.RealFeatures(np.reshape(y, (1, len(y)))) kernel_width = 1 kernel = sg.GaussianKernel(10, kernel_width) mmd = sg.QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feat_p) mmd.set_q(feat_q) alt_samples[i] = mmd.compute_statistic() np.std(alt_samples) plt.figure(figsize=(18, 5)) plt.subplot(131) plt.hist(null_samples, 50, color='blue') plt.title('Null distribution') plt.subplot(132) plt.title('Alternative distribution') plt.hist(alt_samples, 50, color='green') plt.subplot(133) plt.hist(null_samples, 50, color='blue') plt.hist(alt_samples, 50, color='green', alpha=0.5) plt.title('Null and alternative distriution') # find (1-alpha) element of null distribution null_samples_sorted = np.sort(null_samples) quantile_idx = int(len(null_samples) * (1 - alpha)) quantile = null_samples_sorted[quantile_idx] plt.axvline(x=quantile, ymin=0, ymax=100, color='red', label=str(int(round( (1 - alpha) * 100))) + '% quantile of null') plt.show() return self
def lap(packets, lap_list, eps): g = 0 r = 0 num = -1 i = len(lap_list) g = su.cal_g(i) if i == 1 or i == su.cal_d(i): r = int(laplace.rvs(0, 1 / eps)) else: num = int(log(i, 2)) r = int(laplace.rvs(0, num / eps)) x = lap_list[g][1] + (packets[i][1] - packets[g][1]) + r # print(g, i) if x > 1500: x = 1500 if x < 0: x = 0 n = [packets[-1][0], x, packets[-1][2]] return n, x - packets[-1][1]
def selection(mu, sigma, size, distribution): if distribution == Distribution.NORMAL: return norm.rvs(mu, sigma, size) elif distribution == Distribution.CAUCHY: return cauchy.rvs(mu, sigma, size) elif distribution == Distribution.LAPLACE: return laplace.rvs(mu, sigma, size) elif distribution == Distribution.POISSON: return poisson.rvs(mu, size=size) elif distribution == Distribution.UNIFORM: return uniform.rvs(mu, sigma, size) else: return None
def Laplace(): for s in size: den = laplace(scale=1 / m.sqrt(2), loc=0) hist = laplace.rvs(size=s, scale=1 / m.sqrt(2), loc=0) fig, ax = plt.subplots(1, 1) ax.hist(hist, density=True, alpha=0.6) x = np.linspace(den.ppf(0.01), den.ppf(0.99), 100) ax.plot(x, den.pdf(x), LINE_TYPE, lw=1.5) ax.set_xlabel("LAPLACE") ax.set_ylabel("DENSITY") ax.set_title("SIZE: " + str(s)) plt.grid() plt.show()
def laplaceNumbers(): for size in sizes: fig, ax = plt.subplots(1, 1) param = 1 / math.sqrt(2) ax.hist(laplace.rvs(size=size, scale=1 / math.sqrt(2), loc=0), histtype='stepfilled', alpha=0.5, color='blue', density=True) x = np.linspace(laplace(scale=param, loc=0).ppf(0.01), laplace(scale=param, loc=0).ppf(0.99), 100) ax.plot(x, laplace(scale=param, loc=0).pdf(x), '-') ax.set_title('LaplaceNumbers n = ' + str(size)) ax.set_xlabel('LaplaceNumbers') ax.set_ylabel('density') plt.grid() plt.show() return
def test_train(self): gsm = GSM(1, 10) gsm.train(laplace.rvs(size=[1, 10000]), max_iter=100, tol=-1) p = kstest(gsm.sample(10000).flatten(), laplace.cdf)[1] # test whether GSM faithfully reproduces Laplace samples self.assertTrue(p > 0.0001) gsm = GSM(1, 6) gsm.train(cauchy.rvs(size=[1, 10000]), max_iter=100, tol=-1) # test for stability of training self.assertTrue(not any(isnan(gsm.scales)))
def price(futurePrice, days, strike, flag='C', model = 'laplace'): if 'laplace' in model: changes = laplace.rvs(0, futurePrice.var, size=days*5/7) elif 'norm' in model: changes = norm.rvs(0, futurePrice.var, size=days*5/7) values = exp(changes) daily = [float(futurePrice.price) * prod(values[0:i + 1]) for i in range(len(values))] bs = [ BlackScholes( flag, float(price), strike, .005, futurePrice.vol, float( days - i) / 365) for i, price in enumerate(daily)] return sum(bs) / len(bs)
def variates(sizes): rvs = [[], [], [], [], []] for size in sizes: rvs[0].append(numpy.sort(norm.rvs(loc=0, scale=1, size=size))) rvs[1].append( numpy.sort(laplace.rvs(size=size, scale=1 / numpy.sqrt(2), loc=0))) rvs[2].append(numpy.sort(poisson.rvs(10, size=size))) rvs[3].append(numpy.sort(cauchy.rvs(size=size))) rvs[4].append( numpy.sort( uniform.rvs(size=size, loc=-numpy.sqrt(3), scale=2 * numpy.sqrt(3)))) return rvs
def lap_trace(packets, lap_list, eps): # lap_list = [] g = 0 r = 0 num = -1 i = len(lap_list) g = su.cal_g(i) if i == 1 or i == su.cal_d(i): # r = int(np.random.laplace(0, 1/eps)) r = int(laplace.rvs(0, 1 / eps)) else: num = int(log(i, 2)) # r = int(np.random.laplace(0, num/eps)) r = int(laplace.rvs(0, num / eps)) x = lap_list[g] + (packets[i] - packets[g]) + r # print(g, i) # if x > 1500: # x = 1500 if x < 0: x = 0 n = x return n, x - packets[i]
def Hist_Laplace(): laplace_scale = 1 / math.sqrt(2) # Параметры laplace_loc = 0 laplace_label = "Laplace distribution" laplace_color = "blue" for size in selection_size: fig, ax = plt.subplots(1, 1) pdf = laplace(scale=laplace_scale, loc=laplace_loc) random_values = laplace.rvs(size=size, scale=laplace_scale, loc=laplace_loc) ax.hist(random_values, density=True, histtype=HIST_TYPE, alpha=hist_visibility, color=laplace_color) Create_plot(ax, laplace_label, pdf, size)
def initialize(self, method='laplace'): # fit mixture of Gaussian to Laplace mog = MoGaussian(num_components=self.marginals[0].num_components) if method.lower() == 'laplace': mog.train(laplace.rvs(size=[1, 10000]), max_iter=100) elif method.lower() == 'student': mog.train(t.rvs(1, size=[1, 10000]), max_iter=100) else: raise ValueError('Unknown initialization method \'{0}\'.'.format(method)) for m in self.marginals: m.priors = mog.priors.copy() m.scales = mog.scales.copy() m.means = mog.means.copy()
def AddLapNoise4DenseMatrix(X, scal): # cal m n from scipy.sparse import csr_matrix m = csr_matrix(X).shape[0] n = csr_matrix(X).shape[1] #generate laplace noise array from scipy.stats import laplace data = laplace.rvs(scale=scal, size=m * n) #generate lalace noise matrix col = range(n) * m row = [] #tt = time() for i in range(m): row = row + [i] * n A = csr_matrix((data, (row, col)), shape=(m, n)) #print 'time1:',time()-tt #add noise to real data return Smooth(A + X)
def initialize(self, method='laplace'): # fit mixture of Gaussian to Laplace mog = MoGaussian(num_components=self.marginals[0].num_components) if method.lower() == 'laplace': mog.train(laplace.rvs(size=[1, 10000]), max_iter=100) elif method.lower() == 'student': mog.train(t.rvs(1, size=[1, 10000]), max_iter=100) else: raise ValueError( 'Unknown initialization method \'{0}\'.'.format(method)) for m in self.marginals: m.priors = mog.priors.copy() m.scales = mog.scales.copy() m.means = mog.means.copy()
def laplace_numbers(): bins_num = [8, 12, 20] default_left_boundary = -5 default_right_boundary = 5 fig, axs = plt.subplots(len(units)) for i in range(len(units)): samples = laplace.rvs(scale=2 ** (-0.5), loc=0, size=units[i]) left_boundary = min(default_left_boundary, min(samples)) right_boundary = max(default_right_boundary, max(samples)) axs[i].grid() sns.histplot(samples, stat="density", bins=bins_num[i], color='salmon', ax=axs[i]) x = np.linspace(left_boundary, right_boundary, 1000) y = laplace(scale=2 ** (-0.5), loc=0).pdf(x) axs[i].plot(x, y, 'k', lw=2) axs[i].set_xlabel("laplaceNumbers (" + str(units[i]) + " samples)") fig.subplots_adjust(hspace=0.75) fig.savefig("laplaceNumbers.pdf") fig.show()
def test_posterior(self): gsm = GSM(1, 10) gsm.train(laplace.rvs(size=[1, 10000]), max_iter=100, tol=-1) samples = gsm.sample(100) posterior = gsm.posterior(samples) # simple sanity checks self.assertEqual(posterior.shape[0], gsm.scales.shape[0]) self.assertEqual(posterior.shape[1], samples.shape[1]) priors = rand(gsm.num_scales) + .1 priors = priors / sum(priors) gsm.priors = priors avgpost = mean(gsm.posterior(gsm.sample(1000000)), 1) # test whether average posterior equals prior self.assertLess(max(abs(avgpost / priors - 1.)), 0.01)
def test_energy_gradient(self): gsm = GSM(2, 10) gsm.train(randn(2, 10000) * laplace.rvs(size=[1, 10000]), max_iter=100, tol=-1) samples = gsm.sample(100) gradient = gsm.energy_gradient(samples) # simple sanity checks self.assertEqual(gradient.shape[0], samples.shape[0]) self.assertEqual(gradient.shape[1], samples.shape[1]) f = lambda x: gsm.energy(x.reshape(-1, 1)).flatten() df = lambda x: gsm.energy_gradient(x.reshape(-1, 1)).flatten() for i in range(samples.shape[1]): relative_error = check_grad(f, df, samples[:, i]) / sqrt(sum(square(df(samples[:, i])))) # comparison with numerical gradient self.assertLess(relative_error, 0.001)
def LaplaceNumbers(): for size in sizes: mean_list, med_list, z_R_list, z_Q_list, z_tr_list = [], [], [], [], [] all_list = [mean_list, med_list, z_R_list, z_Q_list, z_tr_list] E, D = [], [] for i in range(1000): distribution = laplace.rvs(size=size, scale=1 / m.sqrt(2), loc=0) distribution.sort() mean_list.append(np.mean(distribution)) med_list.append(np.median(distribution)) z_R_list.append(z_R(distribution, size)) z_Q_list.append(z_Q(distribution, size)) z_tr_list.append(z_tr(distribution, size)) for lis in all_list: E_1 = round(np.mean(lis), 6) D_1 = round(np.std(lis)**2, 6) print("n = ", size) print("E(z) = ", E_1) print("D(z) = ", D_1) print("E(z) - sqrt(D(z)) = ", round(E_1 - D_1**0.5, 6)) print("E(z) + sqrt(D(z)) = ", round(E_1 + D_1**0.5, 6))
def run(display=True): m = 20 n = 100 mat = np.random.randn(m, n) ratio_zeros = 0.9 x = np.random.randn(n) * (np.random.rand(n) > ratio_zeros) noise = 0.05 * laplace.rvs(size=m) y = mat.dot(x) + noise lambda_coef = 1.0 # evalute cost for the x value used to generate the data cost_gt = np.sum(np.abs(y - mat.dot(x))) + lambda_coef * np.sum(np.abs(x)) print(f"cost gt ={cost_gt}") lp = SparseLP() x_id = lp.add_variables_array((n), lower_bounds=None, upper_bounds=None) lp.add_soft_linear_constraint_rows( cols=x_id[None, :], vals=mat, lower_bounds=y, upper_bounds=y, coef_penalization=1, ) lp.add_soft_linear_constraint_rows( cols=x_id[:, None], vals=np.ones((n, 1)), lower_bounds=0, upper_bounds=0, coef_penalization=lambda_coef, ) sol, duration = lp.solve("osqp") x_opt = sol[x_id] cost_opt = np.sum( np.abs(y - mat.dot(x_opt))) + lambda_coef * np.sum(np.abs(x_opt)) print(f"cost gt ={cost_gt} cost opt ={cost_opt}") assert cost_opt <= cost_gt
def sample(self, X): return self.noiseless(X) + laplace.rvs(scale=2, size=X.shape[0])
def __call__(self, options, pars, obs=None, trackobs=False): """Simulate process model to get predicted choice and sample size distributions""" ### Basic setup np.random.seed() N = pars.get('N', 10000) # number of simulated trials max_T = int(pars.get('max_T', 1000)) # maximum sample size ### Stopping rules if self.stoprule == 'optional': threshold = pars.get('theta', 3) # decision threshold (optional only) r = pars.get('r', 0) # rate of boundary collapse (optional only) stop_T = None # fixed sample size elif self.stoprule == 'fixedT': stop_T = pars.get('stop_T', 2) max_T = stop_T threshold = 1000 # geometric elif self.stoprule == 'fixedGeom': threshold = 1000 p_stop_geom = pars.get('p_stop_geom') minss = pars.get('minsamplesize', 1) # sample size (not index), adjusted by minsamplesize stop_T = geom.rvs(p_stop_geom, size=N) + (minss - 1) # don't go past max_T stop_T[np.where(stop_T > max_T)[0]] = max_T ### Search # probability of sampling each option p_sample_H = pars.get('p_sample_H', .5) p_sample_L = 1 - p_sample_H # if p_switch is specified, it will be used to generate # sequences of observations (rather than p_sample_H and p_sample_L) p_switch = pars.get('p_switch', None) # are the first two samples drawn from different options? switchfirst = pars.get('switchfirst', False) ### Sequential weights # compute value and attentional weights for multinomial problems if self.problemtype == 'multinomial': if self.rdw is None: wopt = options else: wopt = self.rdw[pars['probid']] weights = np.array([cpt.pweight_prelec(option, pars) for option in wopt]) values = np.array([cpt.value_fnc(option[:,0], pars) for option in options]) v = np.array([np.multiply(weights[i], values[i]) for i in range(len(options))]) V = v.sum(axis=1) evar = np.array([np.dot(weights[i], values[i] ** 2) - np.sum(v[i]) ** 2 for i in range(len(options))]) sigma2 = np.max([np.sum(evar), 1e-10]) sigma2mean = np.max([np.mean(evar), 1e-10]) # sequential weights omega = [] for i, option in enumerate(options): omega.append(weights[i]/option[:,1]) omega = np.array(omega) omega[np.isnan(omega)] = 0 w_outcomes = np.array([np.multiply(omega[i], values[i]) for i in range(len(options))]) elif self.problemtype == 'normal': if 'pow_gain' in pars: w_options = np.array([[0,0],[0,0]]) for i in range(2): ev, evar = cpt.normal_raised_to_power(options[i], pars['pow_gain']) w_options[i] = np.array([ev, evar]) sigma2 = w_options[:,1].sum() evar = w_options[:,1] else: evar = options[:,1] sigma2 = options[:,1].sum() sigma2mean = options[:,1].mean() # scale by variance if 'sc' in pars: # raised to power sc = pars.get('sc') variance_scale = 1 / float(np.sqrt(sigma2) ** sc) elif 'sc2' in pars: # multiplicative sc = pars.get('sc2') variance_scale = 1 / float(np.sqrt(sigma2) * sc) elif 'sc0' in pars: sc0 = pars.get('sc0') elif 'sc_mean' in pars: sc = pars.get('sc_mean') variance_scale = 1 / float(np.sqrt(sigma2mean) ** sc) elif 'sc2_mean' in pars: sc = pars.get('sc2_mean') variance_scale = 1 / float(np.sqrt(sigma2mean) * sc) elif 'sc_x' in pars: variance_scale = pars.get('sc_x') else: variance_scale = 1 ### Starting distribution Z = np.zeros(N) if 'tau' in pars: tau = pars.get('tau') Z = laplace.rvs(loc=0, scale=tau, size=N) elif 'tau_trunc' in pars: tau = pars.get('tau_trunc') dx = .001 x = np.arange(-(threshold-dx), threshold, dx) p = laplace.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) elif 'tau_rel' in pars: tau = pars.get('tau_rel') tau = tau / variance_scale Z = laplace.rvs(loc=0, scale=tau, size=N) elif 'tau_rel_trunc' in pars: tau = pars.get('tau_rel_trunc') dx = .001 x = np.arange(-1+dx, 1, dx) p = laplace.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) Z = Z * threshold elif 'tau_unif' in pars: #tau = pars.get('tau_unif', .001) #theta_max = pars.get('theta_max', theta) #theta_max = 200 #rng = tau * theta_max rng = pars.get('tau_unif', .001) Z = np.linspace(-rng, rng, num=N) np.random.shuffle(Z) #Z = np.random.uniform(low=(-tau), high=tau, size=N) elif 'tau_unif_rel' in pars: dx = .001 rng = pars.get('tau_unif_rel', .001) Z = np.linspace(-(threshold-dx) * rng, (threshold-dx) * rng, num=N) np.random.shuffle(Z) elif 'tau_normal' in pars: tau = pars.get('tau_normal') Z = norm.rvs(loc=0, scale=tau, size=N) elif 'tau_normal_trunc' in pars: tau = pars.get('tau_normal_trunc') dx = .001 x = np.arange(-(threshold-dx), threshold, dx) p = norm.pdf(x, loc=0, scale=tau) pn = p/p.sum() Z = np.random.choice(x, N, p=pn) ### Simulate if obs is not None: # assume a single sequence of known observations sampled_option = obs['option'].values outcomes = obs['outcome'].values max_T = outcomes.shape[0] sgn = 2*sampled_option - 1 sv = np.zeros(outcomes.shape) if self.problemtype is 'normal': c = pars.get('c', 0) # add weighting and criterion here sv = cpt.value_fnc(outcomes - c, pars) elif self.problemtype is 'multinomial': pass for i, opt in enumerate(options): for j, x in enumerate(opt): ind = np.where((sampled_option==i) & (outcomes==x[0]))[0] sv[ind] = w_outcomes[i][j] sv = np.multiply(sv, sgn) sampled_option = np.tile(sampled_option, (N, 1)) outcomes = np.tile(outcomes, (N, 1)) sv = np.tile(sv, (N, 1)) elif self.choicerule == 'random': sv = np.zeros((N, max_T)) sampled_option = None outcomes = None else: # otherwise, simulate sampling from options if False and not trackobs and self.problemtype is 'multinomial' and p_switch is None: sampled_option = None outcomes = None valence = deepcopy(w_outcomes) valence[0] = -1 * valence[0] valence = valence.ravel() p = deepcopy(options[:,:,1]) p[0] = p_sample_L * p[0] p[1] = p_sample_H * p[1] p = p.ravel() sv = np.random.choice(valence, p=p, size=(N, max_T)) # ensure that both options are sampled # at least once if switchfirst: first = np.random.binomial(1, .5, size=N) second = 1 - first first2 = np.transpose((first, second)) sampled_A = first2==0 sampled_B = first2==1 observed_A = np.random.choice(range(len(w_outcomes[0])), size=sampled_A.sum(), p=options[0][:,1]) observed_B = np.random.choice(range(len(w_outcomes[1])), size=sampled_B.sum(), p=options[1][:,1]) # subjective weighting sv2 = np.zeros((N, 2)) sv2[sampled_A] = -1 * w_outcomes[0][observed_A] sv2[sampled_B] = w_outcomes[1][observed_B] sv[:,:2] = sv2 else: # which option was sampled sampled_option = np.zeros((N, max_T), int) if p_switch is None: # ignore switching, just search based on [p_sample_H, p_sample_L] sampled_option = np.random.binomial(1, p_sample_H, size=(N, max_T)) else: # generate search sequences based on p_switch switches = np.random.binomial(1, p_switch, size=(N, max_T - 1)) sampled_option[:,0] = np.random.binomial(1, .5, size=N) for i in range(max_T - 1): switch_i = switches[:,i] sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch_i) # ensure both options sampled at least once if switchfirst: first = np.random.binomial(1, .5, size=N) sampled_option[:,0] = first sampled_option[:,1] = 1 - first # FOR SIMULATION #sampled_option = np.zeros((N, max_T), int) #for i in range(N): # arr = sampled_option[i] # arr[:(max_T/2)] = 1 # np.random.shuffle(arr) # sampled_option[i] = arr # FOR SIMULATION #p_switch = pars.get('p_switch', .5) #sampled_option = np.zeros((N, max_T), int) #sampled_option[:,0] = np.random.choice([0, 1], p=[.5, .5], size=N) #for i in range(max_T - 1): # switch = np.random.choice([0, 1], p=[1-p_switch, p_switch], size=N) # sampled_option[:,i+1] = np.abs(sampled_option[:,i] - switch) sampled_A = sampled_option==0 sampled_B = sampled_option==1 N_sampled_A = sampled_A.sum() N_sampled_B = sampled_B.sum() # observation matrix - which outcome occurred (by index) observed = np.zeros((N, max_T), int) if self.problemtype == 'multinomial': observed_A = np.random.choice(range(len(w_outcomes[0])), size=sampled_A.sum(), p=options[0][:,1]) observed_B = np.random.choice(range(len(w_outcomes[1])), size=sampled_B.sum(), p=options[1][:,1]) observed[sampled_A] = observed_A observed[sampled_B] = observed_B # record outcomes experienced (by value) outcomes = np.zeros((N, max_T)) if self.problemtype == 'multinomial': obj_outcomes = options[:,:,0] #outcomes[sampled_A] = obj_outcomes[0][observed_A] #outcomes[sampled_B] = obj_outcomes[1][observed_B] # note weighting already done above outcomes[sampled_A] = w_outcomes[0][observed_A] outcomes[sampled_B] = w_outcomes[1][observed_B] outcomes_A = outcomes[sampled_A] outcomes_B = outcomes[sampled_B] else: A, B = options sigmaA = np.sqrt(A[1]) sigmaB = np.sqrt(B[1]) # weird conversion for np.truncnorm lowerA, upperA = (X_MIN - A[0]) / sigmaA, (X_MAX - A[0]) / sigmaA lowerB, upperB = (X_MIN - B[0]) / sigmaB, (X_MAX - B[0]) / sigmaB outcomes_A = np.round(truncnorm.rvs(lowerA, upperA, loc=A[0], scale=sigmaA, size=N_sampled_A)) outcomes_B = np.round(truncnorm.rvs(lowerB, upperB, loc=B[0], scale=sigmaB, size=N_sampled_B)) outcomes[sampled_A] = outcomes_A outcomes[sampled_B] = outcomes_B if 'pow_gain' in pars: outcomes = cpt.value_fnc(outcomes, pars) outcomes_A = cpt.value_fnc(outcomes_A, pars) outcomes_B = cpt.value_fnc(outcomes_B, pars) # comparison sv = np.zeros((N, max_T)) # criteria for each option if 'c' in pars: # compare to constant c = pars.get('c') c_A = c * np.ones(outcomes_A.shape) c_B = c * np.ones(outcomes_B.shape) elif 'c_0' in pars: # compare to sample mean c_0 = pars.get('c_0', 45) sum_A = np.cumsum(np.multiply(sampled_A, outcomes), axis=1) N_A = np.cumsum(sampled_A, axis=1, dtype=float) mn_A = np.multiply(sum_A, 1/N_A) mn_A[np.isnan(mn_A)] = c_0 sum_B = np.cumsum(np.multiply(sampled_B, outcomes), axis=1) N_B = np.cumsum(sampled_B, axis=1, dtype=float) mn_B = np.multiply(sum_B, 1/N_B) mn_B[np.isnan(mn_B)] = c_0 compA = np.multiply(outcomes - mn_B, sampled_A) compB = np.multiply(outcomes - mn_A, sampled_B) #sv = (-1 * compA) + compB else: # (default) compare to true (weighted) # mean of other option if self.problemtype is 'multinomial': A, B = V elif self.problemtype is 'normal': if 'pow_gain' in pars: A, B = w_options[:,0] else: A, B = options[:,0] c_A = B * np.ones(outcomes_A.shape) c_B = A * np.ones(outcomes_B.shape) # combine if 'c_0' in pars: sv = (-1 * compA) + compB else: sv[sampled_A] = -1 * (outcomes_A - c_A) sv[sampled_B] = (outcomes_B - c_B) if 'sc0' in pars: # for any options with a variance of zero, # replace with sc0 evar[evar==0.] = sc0 # scaling factor for each option depends on # its variance sc_A, sc_B = 1/np.sqrt(evar) sv[sampled_A] = sv[sampled_A] * sc_A sv[sampled_B] = sv[sampled_B] * sc_B else: # fixed scaling factor across all options sv = sv * variance_scale # noise if 'c_sigma' in pars: c_sigma = pars.get('c_sigma') err = np.random.normal(loc=0, scale=c_sigma, size=outcomes.shape) elif 'dv_sigma' in pars: dv_sigma = pars.get('dv_sigma') err = np.random.normal(loc=0, scale=dv_sigma, size=N) err = np.tile(err, (max_T, 1)).transpose() else: err = np.zeros(outcomes.shape) sv = sv + err ### Accumulation # add starting states to first outcome sv[:,0] = sv[:,0] + Z # p_stay #p_stay = pars.get('p_stay', 0) #if p_stay > 0: # attended = np.random.binomial(1, 1-p_stay, size=(N, max_T)) # sv = np.multiply(sv, attended) # accumulate P = np.cumsum(sv, axis=1) ### Stopping if self.stoprule == 'optional': if r > 0: # collapsing boundaries threshold_min = .1 upper = threshold_min * np.ones((N, max_T)) dec = np.arange(threshold, threshold_min, -r*threshold) dec = dec[:max_T] upper[:,:dec.shape[0]] = np.tile(dec, (N, 1)) lower = -threshold_min * np.ones((N, max_T)) inc = np.arange(-threshold, -threshold_min, r*threshold) inc = inc[:max_T] lower[:,:inc.shape[0]] = np.tile(inc, (N, 1)) crossed = -1 * (P < lower) + 1 * (P > upper) else: # fixed boundaries crossed = -1 * (P < -threshold) + 1 * (P > threshold) # if minimum sample size, prevent stopping minsamplesize = pars.get('minsamplesize', 1) - 1 crossed[:,:minsamplesize] = 0 # any trials where hit max_T, make decision based on # whether greater or less than zero nodecision = np.where(np.sum(np.abs(crossed), axis=1)==0)[0] if len(nodecision) > 0: n_pos = np.sum(P[nodecision,max_T-1] > 0) n_eq = np.sum(P[nodecision,max_T-1] == 0) n_neg = np.sum(P[nodecision,max_T-1] < 0) #assert n_eq == 0, "reached max_T with preference of 0" crossed[nodecision,max_T-1] += 1*(P[nodecision,max_T-1] >= 0) crossed[nodecision,max_T-1] += -1*(P[nodecision,max_T-1] < 0) elif self.stoprule == 'fixedT': crossed = np.zeros((N, stop_T), dtype=int) crossed[:,(stop_T-1)] = np.sign(P[:,(stop_T-1)]) indifferent = np.where(crossed[:,(stop_T-1)]==0)[0] n_indifferent = len(indifferent) crossed[indifferent] = np.random.choice([-1,1], p=[.5, .5], size=(n_indifferent,1)) assert np.sum(crossed[:,(stop_T-1)]==0)==0 elif self.stoprule == 'fixedGeom': crossed = np.zeros((N, max_T), dtype=int) crossed[range(N),stop_T-1] = np.sign(P[range(N),stop_T-1]) indifferent = np.where(crossed[range(N),stop_T-1]==0)[0] n_indifferent = len(indifferent) t_indifferent = (stop_T-1)[indifferent] crossed[indifferent,t_indifferent] = np.random.choice([-1,1], p=[.5,.5], size=n_indifferent) if obs is not None: p_stop_choose_A = np.sum(crossed==-1, axis=0)*(1/float(N)) p_stop_choose_B = np.sum(crossed==1, axis=0)*(1/float(N)) p_sample = 1 - (p_stop_choose_A + p_stop_choose_B) return {'p_stop_choose_A': p_stop_choose_A, 'p_stop_choose_B': p_stop_choose_B, 'p_sample': p_sample, 'traces': P} else: # samplesize is the **index** where threshold is crossed samplesize = np.sum(1*(np.cumsum(np.abs(crossed), axis=1)==0), axis=1) choice = (crossed[range(N),samplesize] + 1)/2 p_resp = choice.mean() ss_A = samplesize[choice==0] ss_B = samplesize[choice==1] p_stop_A = np.zeros(max_T) p_stop_B = np.zeros(max_T) p_stop_A_f = np.bincount(ss_A, minlength=max_T) p_stop_B_f = np.bincount(ss_B, minlength=max_T) if self.stoprule == 'optional' or self.stoprule == 'fixedGeom': if p_stop_A_f.sum() > 0: p_stop_A = p_stop_A_f/float(p_stop_A_f.sum()) if p_stop_B_f.sum() > 0: p_stop_B = p_stop_B_f/float(p_stop_B_f.sum()) elif self.stoprule == 'fixedT': p_stop_A[stop_T-1] = 1 p_stop_B[stop_T-1] = 1 assert (p_stop_A_f.sum() + p_stop_B_f.sum()) == N p_stop_cond = np.transpose([p_stop_A, p_stop_B]) p_stop_cond[np.isnan(p_stop_cond)] = 0. f_stop_cond = np.transpose([p_stop_A_f, p_stop_B_f])/float(N) # only include data up to choice outcome_ind = None traces = None if type(sampled_option) is np.ndarray and trackobs: sampled_option = [sampled_option[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] outcomes = [outcomes[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] traces = [P[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] if self.problemtype is 'multinomial': outcome_ind = [observed[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] return {'choice': choice, 'samplesize': samplesize + 1, 'p_resp': np.array([1-p_resp, p_resp]), 'p_stop_cond': p_stop_cond, 'f_stop_cond': f_stop_cond, 'sampled_option': sampled_option, 'outcomes': outcomes, 'outcome_ind': outcome_ind, 'traces': traces, 'Z': Z }
def _sample(self, loc, scale, k, size): return laplace.rvs(loc=loc, scale=scale, size=k)
def initialize(self, X=None, method='data'): """ Initializes parameter values with more sensible values. @type X: array_like @param X: data points stored in columns @type method: string @param method: type of initialization ('data', 'gabor' or 'random') """ if self.noise: L = self.A[:, :self.num_visibles] if method.lower() == 'data': # initialize features with data points if X is not None: if X.shape[1] < self.num_hiddens: raise ValueError('Number of data points to small.') else: # whitening matrix val, vec = eig(cov(X)) # whiten data X_ = dot(dot(diag(1. / sqrt(val)), vec.T), X) # sort by norm in whitened space indices = argsort(sqrt(sum(square(X_), 0)))[::-1] # pick 25% largest data points and normalize X_ = X_[:, indices[:max([X.shape[1] / 4, self.num_hiddens])]] X_ = X_ / sqrt(sum(square(X_), 0)) # pick first basis vector at random A = X_[:, [randint(X_.shape[1])]] for _ in range(self.num_hiddens - 1): # pick vector with large angle to all other vectors A = hstack([ A, X_[:, [argmin(max(abs(dot(A.T, X_)), 0))]]]) # orthogonalize and unwhiten A = dot(sqrtmi(dot(A, A.T)), A) A = dot(dot(vec, diag(sqrt(val))), A) self.A = A elif method.lower() == 'gabor': # initialize features with Gabor filters if self.subspaces[0].dim > 1 and not mod(self.num_hiddens, 2): for i in range(self.num_hiddens / 2): G = gaborf(self.num_visibles) self.A[:, 2 * i] = real(G) self.A[:, 2 * i + 1] = imag(G) else: for i in range(len(self.subspaces)): self.A[:, i] = gaborf(self.num_visibles, complex=False) elif method.lower() == 'random': # initialize with Gaussian white noise self.A = randn(num_visibles, num_hiddens) elif method.lower() in ['laplace', 'student', 'cauchy', 'exponpow']: if method.lower() == 'laplace': # approximate multivariate Laplace with GSM samples = randn(self.subspaces[0].dim, 10000) samples = samples / sqrt(sum(square(samples), 0)) samples = laplace.rvs(size=[1, 10000]) * samples elif method.lower() == 'student': samples = randn(self.subspaces[0].dim, 50000) samples = samples / sqrt(sum(square(samples), 0)) samples = t.rvs(2., size=[1, 50000]) * samples elif method.lower() == 'exponpow': exponent = 0.8 samples = randn(self.subspaces[0].dim, 200000) samples = samples / sqrt(sum(square(samples), 0)) samples = gamma(1. / exponent, 1., (1, 200000))**(1. / exponent) * samples else: samples = randn(self.subspaces[0].dim, 100000) samples = samples / sqrt(sum(square(samples), 0)) samples = cauchy.rvs(size=[1, 100000]) * samples if self.noise: # ignore first subspace gsm = GSM(self.subspaces[1].dim, self.subspaces[1].num_scales) gsm.train(samples, max_iter=200, tol=1e-8) for m in self.subspaces[1:]: m.scales = gsm.scales.copy() else: # approximate distribution with GSM gsm = GSM(self.subspaces[0].dim, self.subspaces[0].num_scales) gsm.train(samples, max_iter=200, tol=1e-8) for m in self.subspaces: m.scales = gsm.scales.copy() else: raise ValueError('Unknown initialization method \'{0}\'.'.format(method)) if self.noise: # don't initialize noise covariance self.A[:, :self.num_visibles] = L
def sample_episodes(numepisodes, physics): episodes = [] total_events, num_reasonable_events = 0, 0 for epinum in xrange(numepisodes): events = [] detections = [] assocs = [] # first generate all the events numevents = poisson.rvs( physics.lambda_e * 4 * pi * physics.R ** 2 * physics.T ) for evnum in xrange(numevents): # longitude is uniform from -180 to 180 evlon = uniform.rvs(-180, 360) # sin(latitude) is uniform from -1 to 1 evlat = degrees(arcsin(uniform.rvs(-1, 2))) # magnitude has an exponential distribution as per Gutenberg-Richter law while True: evmag = expon.rvs(physics.mu_m, physics.theta_m) # magnitude saturates at some maximum value, # re-sample if we exceed the max if evmag > physics.gamma_m: continue else: break # time is uniform evtime = uniform.rvs(0, physics.T) event = Event(evlon, evlat, evmag, evtime) events.append(event) truedets = [] #print ("event mag %f" % event.mag) # for each event generate its set of true detections for stanum, station in enumerate(STATIONS): dist = compute_distance((station.lon, station.lat), (event.lon, event.lat)) sta_to_ev_az = compute_azimuth((station.lon, station.lat), (event.lon, event.lat)) detprob = logistic(physics.mu_d0[stanum] + physics.mu_d1[stanum] * event.mag + physics.mu_d2[stanum] * dist) #print ("stanum %d dist %f detprob %f" % (stanum, dist, detprob)) # is detected ? if bernoulli.rvs(detprob): dettime = laplace.rvs(event.time + compute_travel_time(dist) + physics.mu_t[stanum], physics.theta_t[stanum]) # Note: the episode only has detections within the first T # seconds. Late arriving detections will not be available. if dettime < physics.T: degdiff = laplace.rvs(physics.mu_z[stanum], physics.theta_z[stanum]) detaz = (sta_to_ev_az + degdiff + 360) % 360 detslow = laplace.rvs(compute_slowness(dist) + physics.mu_s[stanum], physics.theta_s[stanum]) while True: # resample if the detection amplitude is infinite try: detamp = exp(norm.rvs(physics.mu_a0[stanum] + physics.mu_a1[stanum] * event.mag + physics.mu_a2[stanum] * dist, physics.sigma_a[stanum])) except FloatingPointError: continue # disallow zero or infinite amplitudes if detamp == 0 or isinf(detamp): continue break truedets.append(len(detections)) detections.append(Detection(stanum, dettime, detaz, detslow, detamp)) assocs.append(truedets) total_events += 1 if len(truedets) >= 2: num_reasonable_events += 1 # now generate the false detections for stanum in xrange(len(STATIONS)): numfalse = poisson.rvs(physics.lambda_f[stanum] * physics.T) for dnum in xrange(numfalse): dettime = uniform.rvs(0, physics.T) detaz = uniform.rvs(0, 360) detslow = uniform.rvs(compute_slowness(180), compute_slowness(0) - compute_slowness(180)) while True: # resample if the detection amplitude is infinite try: detamp = exp(cauchy.rvs(physics.mu_f[stanum], physics.theta_f[stanum])) except FloatingPointError: continue # disallow zero or infinite amplitudes if detamp == 0 or isinf(detamp): continue break detections.append(Detection(stanum, dettime, detaz, detslow, detamp)) episodes.append(Episode(events, detections, assocs)) print ("{:d} events generated".format(total_events)) print ("{:.1f} % events have at least two detections" .format(100 * num_reasonable_events / total_events)) return episodes
# Plot a laplace distribution with specified parameters. import numpy as np from scipy.stats import laplace import matplotlib.pyplot as plt import sys t_green = '#009933' # params mu = float(sys.argv[1]) b = float(sys.argv[2]) # generate samples # lots of samples are a lazy-man's smoothing lap_X = laplace.rvs(loc=mu, scale=b, size=1000000) x = np.linspace(start=-8*b,stop=8*b,num=1000000) lap_Q = laplace.cdf(x=x, loc=mu, scale=b) # plotting pdf! fig = plt.figure() h = plt.hist(lap_X, bins=500, alpha=1.0, color=t_green, normed=True, histtype='stepfilled', antialiased=True, linewidth=0) # to make the plot symmetric, find maximum distance to centre lower_dx = mu - h[1][0] upper_dx= h[1][-1] - mu max_dx = np.max([abs(lower_dx), abs(upper_dx)]) plt.xlim(-max_dx + mu, max_dx +mu) # labels etc. plt.title('Laplace PDF with mu = '+str(mu)+' and b = '+str(b), family='monospace', size=16) plt.xlabel('x', family='monospace', size=16) plt.ylabel('p(x)', family='monospace', size=16)