def find_best_annualized_usage_params(target_annualized_usage, model, start_params, params_to_change, weather_normal_source, n_guesses=100): best_params = start_params meter = AnnualizedUsageMeter(model=model, temperature_unit_str=TEMPERATURE_UNIT_STR) best_result = meter.evaluate_raw(model_params=best_params, weather_normal_source=weather_normal_source) best_ann_usage = best_result["annualized_usage"][0] for n in range(n_guesses): resolution = abs((target_annualized_usage - best_ann_usage) / target_annualized_usage) param_dict = best_params.to_dict() for param_name,scale_factor in params_to_change: current_value = param_dict[param_name] current_value = norm.rvs(param_dict[param_name], resolution * scale_factor) while current_value < 0: current_value = norm.rvs(param_dict[param_name], resolution * scale_factor) param_dict[param_name] = current_value model_params = model.param_type(param_dict) result = meter.evaluate_raw(model_params=model_params, weather_normal_source=weather_normal_source) ann_usage = result["annualized_usage"][0] if abs(target_annualized_usage - ann_usage) < abs(target_annualized_usage - best_ann_usage): diff = abs(target_annualized_usage - best_ann_usage) best_params = model_params best_ann_usage = ann_usage return best_params, best_ann_usage
def simulate(d, c, N, S, decide=decide_yn): """Simulate data under the modified YN model. Args: d: Sensitivity. c: Bias. N: Number of noise trials. S: Number of signal trials. decide: Decision-rule function. Returns: [(f1, h1, m1, r1) ... ] """ out = [] for _d, _c, _N, _S in zip(d, c, N, S): k = _d/2. + _c psi_0 = norm.rvs(0, 1, _N) rsp_0 = np.array([x for x in decide(psi_0, k)]) r, f = [sum(rsp_0 == i) for i in xrange(2)] psi_1 = norm.rvs(_d, 1, _S) rsp_1 = np.array([x for x in decide(psi_1, k)]) m, h = [sum(rsp_1 == i) for i in xrange(2)] out.append((f, h, m, r)) return out
def get_rate(amount, month, period): rate = 0.0 while rate < rate_min or rate > rate_max: rv = random.uniform(0, 1) if rv < 0.8: rate = int(norm.rvs(loc=65, scale=8)) / 1000.0 elif rv < 0.9: rate = int(norm.rvs(loc=88, scale=2)) / 1000.0 else: rate = int(norm.rvs(loc=35, scale=50)) / 1000.0 # Adjusting rate using needs (rate of low-amount is forced be large) if amount < amount_wm: rate = round(rate * amount_wm / amount, 3) # set low rate when period is small rate = rate * (0.5 * period / period_max + 0.5 * amount_wm / amount) # Campain if month % 100 >= 10: rate = rate + 0.035 # Adjusting rate = round(rate, 3) if rate < rate_min: rate = rate_min elif rate > rate_max: rate = rate_max return rate
def sim_regular_yn(d, c, N, S): """Simulate data under the modified YN model. Parameters ---------- d : float Measure of sensitivity. c : float Measure of bias. N : int Number of trials with stimuli from the first class. S: int, optional Number of trials with stimuli from the second class; defaults to n1. Returns ------- f : int Count of observed false alarms. h : int Count of observed hits. m : int Count of observed misses. r : int Count of observed correct rejections. """ k = d/2. + c psi_0 = norm.rvs(0, 1, N) rsp_0 = np.array([x for x in decide(psi_0, k)]) r, f = [sum(rsp_0 == i) for i in xrange(2)] psi_1 = norm.rvs(d, 1, S) rsp_1 = np.array([x for x in decide(psi_1, k)]) m, h = [sum(rsp_1 == i) for i in xrange(2)] return f, h, m, r
def simulate_stupidDPM(iter_num, M): # Generate mixture sample N = 1000 mu = [0.0, 10.0, 3.0] components = np.random.choice(range(3), size = N, replace = True, p = [0.3, 0.5, 0.2]) samples = [norm.rvs(size = 1, loc = mu[components[i]], scale = 1)[0] for i in range(N)] ## Sample G from DP(M, G0) v = beta.rvs(a = 1.0, b = M, size = N) prob_vector = np.append(np.array(v[0]), v[1:] * np.cumprod(1.0 - v[:-1])) thetas = norm.rvs(size = N, loc = 1.0, scale = 1.0) ### Initialize thetas thetas = np.random.choice(thetas, size = N, replace = True, p = prob_vector) ### Start MCMC chain for i in xrange(iter_num): for j in xrange(N): theta_temp = np.append(thetas[:j], thetas[j+1:]) p = np.append(norm.pdf(samples[j], loc = theta_temp, scale = 1.0), M * norm.pdf(samples[j], loc = 1.0, scale = np.sqrt(2.0))) p = p / sum(p) temp = np.random.choice(np.append(theta_temp, N), size = 1, replace = True, p = p) if (temp == N): thetas[j] = norm.rvs(size = 1, loc = 0.5 * (samples[j] + 1), scale = np.sqrt(0.5)) else: thetas[j] = temp print(thetas) return {"thetas": thetas, "y": samples}
def _test(): ''' ''' dim1_mean = 0 dim1_std = 3 dim2_mean = 20 dim2_std = 3 # 20 normal RVs with mean=0, std= dim1 = list(norm.rvs(dim1_mean, dim1_std, size=20)) # Add a couple of obvious outliers dim1.append(-10) dim1.append(10) dim2 = list(norm.rvs(dim2_mean, dim2_std, size=20)) dim2.append(10) dim2.append(30) data = zip(dim1, dim2) confs = density_based(data) print 'Dim1 params:', dim1_mean, dim1_std print 'Dim2 params:', dim2_mean, dim2_std for d, conf in zip(data, confs): print d, conf
def test_simulated_correlations(): # Get standard brain mask mr_directory = get_data_directory() standard = "%s/MNI152_T1_2mm_brain_mask.nii.gz" %(mr_directory) thresholds = [0.0,0.5,1.0,1.5,1.96,2.0] # Generate random data inside brain mask, run 10 iterations standard = nibabel.load(standard) number_values = len(numpy.where(standard.get_data()!=0)[0]) numpy.random.seed(9191986) for x in range(0,10): data1 = norm.rvs(size=number_values) data2 = norm.rvs(size=number_values) corr = pearsonr(data1,data2)[0] # Put into faux nifti images mr1 = numpy.zeros(standard.shape) mr1[standard.get_data()!=0] = data1 mr1 = nibabel.nifti1.Nifti1Image(mr1,affine=standard.get_affine(),header=standard.get_header()) mr2 = numpy.zeros(standard.shape) mr2[standard.get_data()!=0] = data2 mr2 = nibabel.nifti1.Nifti1Image(mr2,affine=standard.get_affine(),header=standard.get_header()) pdmask = make_binary_deletion_mask([mr1,mr2]) pdmask = nibabel.Nifti1Image(pdmask,header=mr1.get_header(),affine=mr1.get_affine()) score = calculate_correlation(images = [mr1,mr2],mask=pdmask) assert_almost_equal(corr,score,decimal=5)
def reamostrar(particulas, n_particulas = num_particulas): """ Reamostra as partículas devolvendo novas particulas sorteadas de acordo com a probabilidade e deslocadas de acordo com uma variação normal O notebook como_sortear tem dicas que podem ser úteis Depois de reamostradas todas as partículas precisam novamente ser deixadas com probabilidade igual Use 1/n ou 1, não importa desde que seja a mesma """ probs = [p.w for p in particulas] print("Probabilidades: ") print(probs) print("Soma probs") print(sum(probs)) pfinal = draw_random_sample(particulas, probs, n_particulas) for p in pfinal: p.x+=norm.rvs(scale=std_resample_x) p.y+=norm.rvs(scale=std_resample_y) p.theta+=norm.rvs(scale=std_resample_theta) p.w = 1.0 return pfinal
def MakeSamples(parameters_A, parameters_B, percentage_A, TotalSize=200000): sizeA = int(percentage_A * TotalSize) sizeB = TotalSize - sizeA setA = [] for mu0, sigma0 in parameters_A: setA.append(norm.rvs(loc=mu0, scale=sigma0, size=sizeA)) setA = np.array(setA) ones = np.ones([1, setA.shape[1]]) setA = np.transpose(np.append(setA, ones, axis=0)) setB = [] for mu0, sigma0 in parameters_B: setB.append(norm.rvs(loc=mu0, scale=sigma0, size=sizeB)) setB = np.array(setB) zeros = np.zeros([1, setB.shape[1]]) setB = np.transpose(np.append(setB, zeros, axis=0)) npout = np.vstack([setA, setB]) npout = np.concatenate((npout, percentage_A * np.ones([npout.shape[0], 1]) ), axis=1) return npout
def setup(self): ######### # PART 1: Make model calcium data ######### # Data parameters RATE = 1 # mean firing rate of poisson spike train (Hz) STEPS = 100 # number of time steps in data STEPS_LONG = 5000 # number of time steps in data TAU = 0.6 # time constant of calcium indicator (seconds) DELTAT = 1 / 30 # time step duration (seconds) self.sigma = 0.1 # standard deviation of gaussian noise SEED = 2222 # random number generator seed # Make a poisson spike trains self.spikes = sima.spikes.get_poisson_spikes(deltat=DELTAT, rate=RATE, steps=STEPS, seed=SEED) # longer time-series for parameter estimation self.spikes_long = sima.spikes.get_poisson_spikes(deltat=DELTAT, rate=RATE, steps=STEPS_LONG, seed=SEED) # Convolve with kernel to make calcium signal np.random.seed(SEED) self.gamma = 1 - (DELTAT / TAU) CALCIUM = signal.lfilter([1], [1, -self.gamma], self.spikes) CALCIUM_LONG = signal.lfilter([1], [1, -self.gamma], self.spikes_long) # Make fluorescence traces with random gaussian noise and baseline self.fluors = CALCIUM + norm.rvs(scale=self.sigma, size=STEPS) + uniform.rvs() self.fluors_long = CALCIUM_LONG + norm.rvs(scale=self.sigma, size=STEPS_LONG) + uniform.rvs()
def epsilon(self, asset): """Sample from the standard normal distribution for the given asset. For uncorrelated risk calculation jobs we sample the standard normal distribution for each asset. In the opposite case ("perfectly correlated" assets) we sample for each building typology i.e. two assets with the same typology will "share" the same standard normal distribution sample. Two assets are considered to be of the same building typology if their taxonomy is the same. The asset's `taxonomy` is only needed for correlated jobs and unlikely to be available for uncorrelated ones. """ correlation = getattr(self, "ASSET_CORRELATION", None) if not correlation: # Sample per asset return norm.rvs(loc=0, scale=1) elif correlation != "perfect": raise ValueError('Invalid "ASSET_CORRELATION": %s' % correlation) else: # Sample per building typology samples = getattr(self, "samples", None) if samples is None: # These are two references for the same dictionary. samples = self.samples = dict() taxonomy = asset.get("taxonomy") if taxonomy is None: raise ValueError("Asset %s has no taxonomy" % asset["assetID"]) if taxonomy not in samples: samples[taxonomy] = norm.rvs(loc=0, scale=1) return samples[taxonomy]
def __init__(self, d, w = None, rate = 1.0): self.d = d if w == None: normal.rvs(loc = 0, scale = 0.1, size = d) else: self.w = w self.rate = rate
def get_rate(amount, month, period): rate = 0.0 while rate < rate_min or rate > rate_max: rv = random.uniform(0, 1) if rv < 0.7: rate = int(norm.rvs(loc=65, scale=8)) / 1000.0 elif rv < 0.9: rate = int(norm.rvs(loc=88, scale=2)) / 1000.0 else: rate = int(norm.rvs(loc=35, scale=50)) / 1000.0 # set low rate when period is small rate = rate + rate * (amount_wm - amount) * period / amount_wm / period_max # Campain if month % 100 >= 10: rate = rate + 0.015 # Adjusting rate = round(rate, 3) if rate < rate_min: rate = rate_min elif rate > rate_max: rate = rate_max return rate
def MakeMultiGSamples(parameters_A, parameters_B, percentage_A, percentage_A_Expected, TotalSize=200000): sizeA = int(percentage_A * TotalSize) sizeB = TotalSize - sizeA print sizeA, sizeB setA = [] for feature in parameters_A: # print feature tmp_feat = np.array([]) for (mu0, sigma0), percent in feature: # print mu0, sigma0, percent tmp_feat = np.append(tmp_feat, norm.rvs(loc=mu0, scale=sigma0, size=int(sizeA * percent) ) ) # print "tmp_feat:",tmp_feat np.random.shuffle(tmp_feat) setA.append(tmp_feat) # print setA setA = np.array(setA) ones = np.ones([1, setA.shape[1]]) setA = np.transpose(np.append(setA, ones, axis=0)) # print setA setB = [] for feature in parameters_B: tmp_feat = np.array([]) for (mu0, sigma0), percent in feature: # print mu0, sigma0, percent tmp_feat = np.append(tmp_feat, norm.rvs(loc=mu0, scale=sigma0, size=int(sizeB * percent) ) ) np.random.shuffle(tmp_feat) setB.append(tmp_feat) setB = np.array(setB) zeros = np.zeros([1, setB.shape[1]]) setB = np.transpose(np.append(setB, zeros, axis=0)) print "Set1 shape:", setA.shape print "Set2 shape:", setB.shape npout = np.vstack([setA, setB]) npout = np.concatenate((npout, percentage_A_Expected * np.ones([npout.shape[0], 1]) ), axis=1) return npout
def simulate_normal_model(means, serrs, count, taus=None, do_thetas=False): # Check if any deltas, and differ to it for ii in range(len(means)): if serrs[ii] == 0: if do_thetas: results = np.zeros((count, 2 + len(means))) results[:, 0] = 0 results[:, 1:] = means[ii] else: results = np.zeros((count, 2)) results[:, 0] = 0 results[:, 1] = means[ii] return results means = np.array(means, dtype=np.float_) varis = np.square(np.array(serrs, dtype=np.float_)) if taus is None: taus = np.linspace(0, 2 * max(serrs), 100) p_tau = np.array([p_tau_given_y(tau, means, varis) for tau in taus]) F_tau = np.cumsum(p_tau) F_tau = F_tau / F_tau[-1] if do_thetas: results = np.zeros((count, 2 + len(means))) else: results = np.zeros((count, 2)) rands = get_random(taus, F_tau, count) for ii in range(count): tau = rands[ii] (vari_tau_sqrs, v_mu, mu_hat) = helper_params(means, varis, tau) if np.isnan(mu_hat): mu = np.nan else: mu = norm.rvs(size=1, loc=mu_hat, scale=math.sqrt(v_mu)) results[ii, 0] = tau results[ii, 1] = mu if do_thetas: if tau == 0: vs = np.zeros((1, varis.size)) theta_hats = mu * ones((1, varis.size)) else: tau_sqr = tau * tau denoms = 1.0 / varis + 1.0 / tau_sqr vs = 1.0 / denoms theta_hats = (means / varis + mu / tau_sqr) / denoms thetas = norm.rvs(loc=theta_hats, scale=vs) results[ii, 2:] = thetas return results
def test(): from scipy.stats import norm rvs = np.append(norm.rvs(loc=2,scale=1,size=(200,1)), norm.rvs(loc=1,scale=3,size=(200,1)), axis=1).T scatter_kde(rvs[0,:], rvs[1,:]) pl.show()
def normalDisPrior(fileName, avgBurstTime, procNumber, priorNum): normalDisList = norm.rvs(avgBurstTime, avgBurstTime/6 , procNumber) priorList = norm.rvs(priorNum, 9/6, procNumber) f = open(fileName, 'a') for i,j in zip(normalDisList, priorList): n = int(i) r = random.randint(0,69) p = int(j) f.write(str(n) +' '+ str(r) + ' '+ str(p) + '\n')
def updateSpare(spare_list, spare_amount): ''' Updates the spare list of gaussian distributed random variables when they are used. ''' del spare_list mov_spare_x = norm.rvs(size = spare_amount).tolist() mov_spare_y = norm.rvs(size = spare_amount).tolist() spare_list = [mov_spare_x,mov_spare_y] return spare_list
def get_normal_example(sample_count): loc = 1.0 scale = 2.0 samples0 = norm.rvs(loc, scale, sample_count) samples1 = norm.rvs(loc, scale, sample_count) scores0 = norm.logpdf(samples0, loc, scale) scores1 = norm.logpdf(samples1, loc, scale) samples = numpy.array(zip(samples0, samples1)) scores = scores0 + scores1 return {'name': 'normal', 'samples': samples, 'scores': scores}
def sim_data(self, K, N): """ Draws K stationary time series of length N from the Vasicek model and returns them as a K x N array. """ X = np.zeros((K, N)) X[:,0] = norm.rvs(size=K, loc=self.stat_mean, scale=self.stat_sd) for t in range(1, N): X[:,t] = self.beta + self.alpha * X[:,t-1] + self.s * norm.rvs(size=K) return X
def get_amount(amount_min): amount = 0 while amount < amount_min: rv = random.uniform(0, 1) if rv < 0.65: amount = int(norm.rvs(loc=100, scale=35)) * 1000 elif rv < 0.95: amount = int(norm.rvs(loc=285, scale=50)) * 1000 else: amount = int(norm.rvs(loc=450, scale=7)) * 1000 return amount
def ts(self, n): Z = norm.rvs(size=n) W = norm.rvs(size=n) X = np.empty(n) s = np.empty(n) # Holds log of s s[0] = self.s0 X[0] = self.beta / (1 - self.alpha) for t in range(1, n): s[t] = self.b * (s[t-1]**(1 - self.rho)) * np.exp(self.gamma * W[t]) X[t] = self.beta + self.alpha * X[t-1] + s[t-1] * Z[t] return X
def get_amount(amount_min): amount = 0 while amount < amount_min or amount > amount_max: rv = random.uniform(0, 1) if rv < 0.3: amount = amount_max - int(norm.rvs(loc=120, scale=30)) * 1000 elif rv < 0.55: amount = amount_max - int(chi2.rvs(5, loc=380, scale=145)) * 1000 else: amount = amount_max - int(norm.rvs(loc=660, scale=145)) * 1000 return amount
def generate(n, mu, sigma, gap, c_min, c_max, distribution="lognormal"): r=100 #readlength if distribution == 'normal': samples = norm.rvs(loc=mu, scale=sigma, size=2*n) elif distribution == 'lognormal': logsample = norm.rvs(loc=mu, scale=sigma, size=max(1,int(gap)/100)*n) samples = np.exp(logsample) else: print("Specify normal, lognormal or do not set this argument.") return None min_sample = min(samples) max_sample = float(max(samples)) mean_samples = sum(samples)/len(samples) # print 'Mean all observations:', mean_samples std_dev_samples = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_samples + mean_samples ** 2), samples))) / (len(samples) - 1)) ** 0.5 # print 'STDDEV all samples:', std_dev_samples #observations_over_gap = [ int(round(max(s-gap,0),0)) for s in samples] #observations_over_gap = filter(lambda x: x>0, observations_over_gap) #print sum(observations_over_gap)/len(observations_over_gap) samples_kept = [] for s in samples: if s > c_min + c_max + gap or s < gap or s < -gap or c_min <= -gap: continue p = random.uniform(0,1) # print 'lol',max_sample, gap, (s-gap-2*r) / max(0,(max_sample-gap-2*r)) if p < (s-gap-2*r) / max(0,(max_sample-gap-2*r)): samples_kept.append(s) # print len(samples_kept) if len(samples_kept) <= 1: return [] # print "gap:", gap mean_samples = sum(samples_kept)/len(samples_kept) # print 'Mean conditional fragment size:', mean_samples std_dev_samples = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_samples + mean_samples ** 2), samples_kept))) / (len(samples_kept) - 1)) ** 0.5 # print 'STDDEV conditional fragment size:', std_dev_samples observations_over_gap = [ int(round(max(s-gap,0),0)) if gap > 0 else int(round(max(s - gap,0),0)) for s in samples_kept] observations_kept = filter(lambda x: x>0, observations_over_gap) mean_obs = sum(observations_kept)/len(observations_kept) # print 'Mean conditional observed size:', mean_obs std_dev_obs = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_obs + mean_obs ** 2), observations_kept))) / (len(observations_kept) - 1)) ** 0.5 # print 'STDDEV conditional observed size:', std_dev_obs # print # print return observations_kept
def chi2_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x,df)) #simulate the chi2 distribution y = [] n=10 for i in range(1000): chi2r=0.0 r = norm.rvs(size=n) for j in range(n): chi2r=chi2r+r[j]**2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(-4, 4, 100) ax.plot(x, t.pdf(x,df)) #simulate the t-distribution y = [] for i in range(1000): rx = norm.rvs() ry = chi2.rvs(df) rt = rx/np.sqrt(ry/df) y.append(rt) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) #simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.show()
def get_period(min, max): # period = random.randint(min, max) period = 0 while period < min or period > max: rv = random.uniform(0, 1) if rv < 0.45: period = int(norm.rvs(loc=12, scale=2)) elif rv < 0.8: period = int(norm.rvs(loc=6, scale=1)) else: period = int(norm.rvs(loc=14, scale=4)) return period
def update(self): system_dict = {s.get_data()['name']: s for s in self.systems} self.data['core_power'] *= (2 ** (1 / system_dict['reac'].double_time())) power = self.data['core_power'] src1_accuracy = self.data['src1_accuracy'] src2_accuracy = self.data['src2_accuracy'] irc1_accuracy = self.data['irc1_accuracy'] irc2_accuracy = self.data['irc2_accuracy'] self.data['irc1'] = power / self.data['irc1a2fp'] * (1 + norm_module.rvs(0, src1_accuracy, 1)[0]) self.data['irc2'] = power / self.data['irc2a2fp'] * (1 + norm_module.rvs(0, src2_accuracy, 1)[0]) self.data['src1'] = power / self.data['src1cps2fp'] * (1 + norm_module.rvs(0, irc1_accuracy, 1)[0]) self.data['src2'] = power / self.data['src2cps2fp'] * (1 + norm_module.rvs(0, irc2_accuracy, 1)[0])
def reduce_and_save(filename, add_noise=False, rms_noise=0.001, output_path="", cube_output=None, nsig=3, slicewise_noise=True): ''' Load the cube in and derive the property arrays. ''' if add_noise: if rms_noise is None: raise TypeError("Must specify value of rms noise.") cube, hdr = getdata(filename, header=True) # Optionally scale noise by 1/10th of the 98th percentile in the cube if rms_noise == 'scaled': rms_noise = 0.1*np.percentile(cube[np.isfinite(cube)], 98) from scipy.stats import norm if not slicewise_noise: cube += norm.rvs(0.0, rms_noise, cube.shape) else: spec_shape = cube.shape[0] slice_shape = cube.shape[1:] for i in range(spec_shape): cube[i, :, :] += norm.rvs(0.0, rms_noise, slice_shape) sc = SpectralCube(data=cube, wcs=WCS(hdr)) mask = LazyMask(np.isfinite, sc) sc = sc.with_mask(mask) else: sc = filename reduc = Mask_and_Moments(sc, scale=rms_noise) reduc.make_mask(mask=reduc.cube > nsig * reduc.scale) reduc.make_moments() reduc.make_moment_errors() # Remove .fits from filename save_name = filename.split("/")[-1][:-4] reduc.to_fits(output_path+save_name) # Save the noisy cube too if add_noise: if cube_output is None: reduc.cube.hdu.writeto(output_path+save_name) else: reduc.cube.hdu.writeto(cube_output+save_name)
def kernelDensity(): # creating data with two peaks sampD1 = norm.rvs(loc=-1.0,scale=1,size=300) sampD2 = norm.rvs(loc=2.0,scale=0.5,size=300) samp = hstack([sampD1,sampD2]) # obtaining the pdf (my_pdf is a function!) my_pdf = gaussian_kde(samp) # plotting the result x = linspace(-5,5,100) plot(x,my_pdf(x),'r') # distribution function hist(samp,normed=1,alpha=.3) # histogram show()
def rprior(size, hyperparameters): """ returns untransformed parameters """ mu = norm.rvs(size = size, loc = hyperparameters["mu_mean"], scale = hyperparameters["mu_sd"]) beta = norm.rvs(size = size, loc = hyperparameters["beta_mean"], scale = hyperparameters["beta_sd"]) xi = random.exponential(scale = 1 / hyperparameters["xi_rate"], size = size) omega2 = random.exponential(scale = 1 / hyperparameters["omega2_rate"], size = size) lamb = random.exponential(scale = 1 / hyperparameters["lambda_rate"], size = size) parameters = zeros((5, size)) parameters[0, :] = mu parameters[1, :] = beta parameters[2, :] = xi parameters[3, :] = omega2 parameters[4, :] = lamb return parameters
def multiprocessing_deconvolution(argument_list): negative_control_scores, sgRNA_indices, perturbation_profile, gamma_list, simulations_n, replicates, guideindices2bin, averaging_method, scale, rescaled_sgRNA_indices_w_obs, groups, maximum_distance = argument_list # # Iterate through n simulations # beta_distributions = {} # for n in range(1, simulations_n + 1): # if n%100 == 0: # logger.info('Simulation %s out of %s ...' % (str(n), str(simulations_n))) replicate_store = {} for r in range(replicates): if negative_control_scores[0] == 'gaussian': if scale > 1: rescaled_observations = [] for scaled_index in rescaled_sgRNA_indices_w_obs: rescaled_observations.append(np.mean(norm.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(guideindices2bin[scaled_index])))) else: rescaled_observations = norm.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(rescaled_sgRNA_indices_w_obs)) elif negative_control_scores[0] == 'laplace': if scale > 1: rescaled_observations = [] for scaled_index in rescaled_sgRNA_indices_w_obs: rescaled_observations.append(np.mean(laplace.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(guideindices2bin[scaled_index])))) else: rescaled_observations = laplace.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(rescaled_sgRNA_indices_w_obs)) elif negative_control_scores[0] == 'negative_control_guides': if scale > 1: rescaled_observations = [] for scaled_index in rescaled_sgRNA_indices_w_obs: rescaled_observations.append(np.mean(np.random.choice(negative_control_scores[1][r], len(guideindices2bin[scaled_index]), replace = True))) else: rescaled_observations = np.random.choice(negative_control_scores[1][r], len(rescaled_sgRNA_indices_w_obs), replace = True) # Set up regularized deconvolution optimization problem df = pd.DataFrame({'pos':rescaled_sgRNA_indices_w_obs, 'lfc':rescaled_observations, 'group':groups}) genomic_coordinates = [] gammas2betas = {} delete_gammas = [] # Iterate through groups and perform deconvolution for group in df.group.unique(): # Filtered dataframe to separate individual groups dff = df[df.group == group] # Make sure >1 sgRNA exists per group # if len(dff.index) > 1: # Assign relevant variables for optimization problem y = dff.lfc.tolist() # y = np.array(y).reshape(len(y), 1) betas = Variable(len(np.arange(dff.pos.tolist()[0], dff.pos.tolist()[-1], scale).tolist()) + maximum_distance) x_shift = [int(maximum_distance + (x - dff.pos.tolist()[0])/int(scale)) for x in dff.pos.tolist()] gamma = Parameter(sign = "positive") # gamma = Parameter(nonneg = True) genomic_coordinates += np.arange(int(dff.pos.tolist()[0]), int(dff.pos.tolist()[-1]) + scale, scale).tolist() # Formulate optimization problem objective = Minimize(0.5*sum_squares(y - conv(perturbation_profile, betas)[x_shift]) + gamma*sum_entries(abs(diff(betas)))) # objective = Minimize(0.5*sum_squares(y - conv(perturbation_profile, betas)[x_shift]) + gamma*tv(betas)) p = Problem(objective) # Solve for varying lambdas for g in gamma_list: # Make sure solver converges, otherwise delete gammas that fail try: if g not in gammas2betas: gammas2betas[g] = [] gamma.value = g result = p.solve() gammas2betas[g] += np.array(betas.value).reshape(-1).tolist()[int(maximum_distance/2):-int(maximum_distance/2)] except: delete_gammas.append(g) continue # Delete gammas that failed to converge for g in delete_gammas: del gammas2betas[g] gammas2betas['indices'] = genomic_coordinates # Add to replicate store replicate_store[r] = gammas2betas[gamma_list[0]] # Create combined deconvolved signals from replicates for simulation deconvolved_signal = {} for i in replicate_store.keys(): for j in range(len(replicate_store[i])): if j not in deconvolved_signal: deconvolved_signal[j] = [] deconvolved_signal[j].append(replicate_store[i][j]) # Create mean or median profile if averaging_method == 'mean': combine_simulations = [np.mean(deconvolved_signal[x]) for x in deconvolved_signal] elif averaging_method == 'median': combine_simulations = [np.median(deconvolved_signal[x]) for x in deconvolved_signal] # for i in range(len(combine_simulations)): # try: # beta_distributions[i].append(combine_simulations[i]) # except: # beta_distributions[i] = [combine_simulations[i]] return combine_simulations
def sample_mixed(self, pis, mus, sigmas, j, size=1): choice = np.random.choice(np.arange(0, pis.shape[1]), p=pis[j]) return norm.rvs(size=size, loc=mus[j][choice], scale=sigmas[j][choice])
# -*- coding: utf-8 -*- """ Created on Sat Oct 19 17:42:04 2019 @author: flori """ from pandas import Series, DataFrame import pandas as pd import numpy as np methodeA = Series([ 79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, 80.05, 80.03, 80.02, 80.00, 80.02 ]) print(methodeA.mean()) print(methodeA.std()) ########################## from scipy.stats import norm np.random.seed(1) methodeA_sim1 = Series(np.round(norm.rvs(size=6, loc=80, scale=0.02), 2)) methodeA_sim1 methodeA_sim1.mean() methodeA_sim1.std()
def normal_rvs(mu, sigma=1, random_state=None): return norm.rvs(loc=mu, scale=sigma, random_state=random_state)
# 1 simulate characteristics Cij,t data_low = 0.9 data_scale = 0.1 data_size = pc1 pj = uniform.rvs(loc=data_low, scale=data_scale, size=data_size) data_mean = 0 data_std = 1 data_size = id_num # epsilon_ij_t = norm.rvs(loc=data_mean, scale=data_std, size=data_size) c = np.zeros(shape=(id_num * T_num, pc1)) for j in range(pc1): c[0:200, j] = norm.rvs(loc=data_mean, scale=data_std, size=data_size) for t in range(1, T_num): c[200 * t:200 * (t + 1), j] = c[200 * (t - 1):200 * t, j] * pj[j] + norm.rvs( loc=data_mean, scale=data_std, size=data_size) * np.sqrt(1 - pj[j]**2) c_rank = np.zeros(shape=(id_num * T_num, pc1)) # rank over cross-section for j in range(pc1): temp_series = pd.Series(c[:, j]) temp_series = temp_series.rank() temp_series = 2 * temp_series / (len(temp_series) + 1) - 1 c_rank[:, j] = temp_series.copy()
def resample(self, aj, ai, params): if isinstance(params, list): mu, kappa, theta, sigma, nu, eta, lda, omega = self._unwrap_params(params) else: mu, kappa, theta, sigma, nu, eta, lda, omega = self._unwrap_param_states(params) neg_idxs = np.where(aj<0)[0] for i in neg_idxs: while aj[i] < 0: aj[i] = ai[i] + kappa[i]*(eta[i]-ai[i])*self.dt + lda[i]*np.sqrt(ai[i]*self.dt)*norm.rvs() return aj
def observation_predict(self, x_pred, particles, y_prev, mu): y_hat = y_prev + (mu-1/2*x_pred)*self.dt + np.sqrt(particles*self.dt)*norm.rvs() py_hat = np.array([np.mean(self.prediction_density(y_hat[k], y_prev, x_pred, mu)) for k in range(len(y_hat))]) py_hat = py_hat/sum(py_hat) return np.sum(py_hat * y_hat)
def moveBrownian(self): self.x += norm.rvs(scale=self.T) self.y += norm.rvs(scale=self.T)
def filter(self, params, is_bounds=True, simple_resample=False, predict_obs=False): """ Performs sequential monte-carlo sampling particle filtering Note: Currently only supports a bound of parameters """ y = self.y N = self.N if not is_bounds: # params is an array of param values, not particles mu, kappa, theta, sigma, rho, v0 = self._unwrap_params(params) else: # initialize param states, N particles for each param sampled uniformly v0 = params[-1] # params is shape [(lb, ub)_1,...,k, v0] params_states = self._init_parameter_states(N, params[:-1]) observations = np.zeros(len(y)) hidden = np.zeros(len(y)) observations[0] = y[0] hidden[0] = v0 # particles = np.maximum(1e-3, self.proposal_sample(self.N, v, dy, params)) weights = np.array([1/self.N] * self.N) # initialize v particles particles = norm.rvs(v0, 0.02, N) particles = np.maximum(1e-4, particles) # storing the estimated parameters each step params_steps = np.zeros((len(params)-1, len(y))) params_steps.transpose()[0] = np.mean(params_states, axis=1) for i in range(1, len(y)): dy = y[i] - y[i-1] # prediction # proposal sample x_pred = self.proposal_sample(N, particles, dy, params_states) x_pred = np.maximum(1e-3, x_pred) # weights Li = self.likelihood(y[i], x_pred, particles, y[i-1], params_states) I = self.proposal(x_pred, particles, dy, params_states) T = self.transition(x_pred, particles, params_states) weights = weights * (Li*T/I) weights = weights/sum(weights) # Resampling if self._neff(weights) < 0.7*self.N: print('resampling since: {}'.format(self._neff(weights))) if simple_resample: x_pred, weights, params_states = self._simple_resample(x_pred, weights, params_states) else: x_pred, weights, params_states = self._systematic_resample(x_pred, weights, params_states) # observation prediction if predict_obs: y_hat = self.observation_predict(x_pred, particles, y[i-1], np.mean(params_states[0])) # mu is the 0 index observations[i] = y_hat print("Done with iter: {}".format(i)) hidden[i] = np.sum(x_pred * weights) particles = x_pred params_steps.transpose()[i] = np.sum(np.multiply(params_states, weights[np.newaxis, :]), axis=1) return (hidden, params_steps, observations) if predict_obs else (hidden, params_steps)
def noise(self, relpos): ell = norm.rvs(loc=relpos[0], scale=relpos[0] * self.distance_noise_rate) phi = norm.rvs(loc=relpos[1], scale=self.direction_noise) return np.array([ell, phi]).T
def dgv(mu, nu, minv=1, maxv=20): "Discrete Gaussian variate" rv = round(norm.rvs(mu, nu)) return min(max(rv, minv), maxv)
def test_river_discharge_simulation(): # Modules activation and deactivation # analysis = False # cdf_pdf_representation = False # temporal_dependency = False # climatic_events_fitting = True # threshold_checking_for_simulation = False # simulation_cycles = True analysis = True cdf_pdf_representation = False temporal_dependency = False climatic_events_fitting = True threshold_checking_for_simulation = False simulation_cycles = True #%% Input data # Initial year, number of years, number of valid data in a year anocomienzo, duracion, umbralano = (2018, 10, 0.8) # Type of fit (0-GUI, 1-stationary, 2-nonstationary) ant = [2] # Fourier order for nonstationary analysis no_ord_cycles = [2] no_ord_calms = [2] # Number of simulations no_sim = 1 # Type of fit functions fun_cycles = [st.exponweib] fun_calms = [st.norm] # Number of normals no_norm_cycles = [False] no_norm_calms = [False] f_mix_cycles = [False] mod_cycles = [[0, 0, 0, 0]] # Cycles River discharge threshold_cycles = 25 # minimum_interarrival_time = pd.Timedelta('250 days') # minimum_cycle_length = pd.Timedelta('5 days') minimum_interarrival_time = pd.Timedelta('7 days') minimum_cycle_length = pd.Timedelta('2 days') # Cycles SPEI threshold_spei = 0 minimum_interarrival_time_spei = pd.Timedelta('150 days') minimum_cycle_length_spei = pd.Timedelta('150 days') interpolation = True interpolation_method = 'linear' interpolation_freq = '1min' truncate = True extra_info = True #%% Read data # Import river discharge data when all dams were active data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_river_discharge.modf' path_name = os.path.join(data_path, modf_file_name) modf_rd = MetOceanDF.read_file(path_name) # Group into dataframe river_discharge = pd.DataFrame(modf_rd) # Delete rows where with no common values river_discharge.dropna(how='any', inplace=True) # Import complete rive discharge historic data # All historic river discharge data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'caudales.txt' path_name = os.path.join(data_path, modf_file_name) modf_all = pd.read_table(path_name, header=None, delim_whitespace=True) date_col = dates.extract_date(modf_all.iloc[:, 0:4]) modf_all.index = date_col modf_all.drop(modf_all.columns[0:4], axis=1, inplace=True) modf_all.columns = ['Q'] #%% Preprocessing t_step = missing_values.find_timestep(river_discharge) # Find tstep data_gaps = missing_values.find_missing_values(river_discharge, t_step) river_discharge = missing_values.fill_missing_values( river_discharge, t_step, technique='interpolation', method='nearest', limit=16 * 24, limit_direction='both') data_gaps_after = missing_values.find_missing_values( river_discharge, t_step) # Add noise for VAR noise = np.random.rand(river_discharge.shape[0], river_discharge.shape[1]) * 1e-2 river_discharge = river_discharge + noise # Save_to_pickle river_discharge.to_pickle('river_discharge.p') # Group into list of dataframes df = list() df.append(pd.DataFrame(river_discharge['Q'])) #%% Cycles and calms calculation cycles, calm_periods, info = extremal.extreme_events( river_discharge, 'Q', threshold_cycles, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # Calculate duration of the cycles dur_cycles = extremal.events_duration(cycles) dur_cycles_description = dur_cycles.describe() sample_cycles = pd.DataFrame(info['data_cycles'].iloc[:, 0]) noise = np.random.rand(sample_cycles.shape[0], sample_cycles.shape[1]) * 1e-2 sample_cycles = sample_cycles + noise sample_calms = pd.DataFrame(info['data_calm_periods']) noise = np.random.rand(sample_calms.shape[0], sample_calms.shape[1]) * 1e-2 sample_calms = sample_calms + noise #%% CLIMATIC INDICES # Sunspots data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'sunspot.csv' path_name = os.path.join(data_path, modf_file_name) sunspot = pd.read_csv(path_name, header=None, delim_whitespace=True, parse_dates=[[0, 1]], index_col=0) sunspot = sunspot.drop([2, 4, 5], axis=1) # SPEI data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'spei_cadiz.csv' path_name = os.path.join(data_path, modf_file_name) spei = pd.read_csv(path_name, sep=',') spei.index = sunspot.index[2412:3233] # Calculate cycles over SPEI spei = pd.DataFrame(spei.loc[:, 'SPEI_12'] * 100).dropna() cycles_spei, calm_periods_spei, info_spei = extremal.extreme_events( spei, 'SPEI_12', threshold_spei, minimum_interarrival_time_spei, minimum_cycle_length_spei, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) peaks_over_thres_spei = extremal.events_max(cycles_spei) # Plot peaks peaks_over_thres = extremal.events_max(cycles) # Represent cycles fig1 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(river_discharge) ax.axhline(threshold_cycles, color='lightgray') ax.plot(spei.loc[:, 'SPEI_12'] * 100, color='0.75', linewidth=2) # Plot cycles # for cycle in cycles_all: # ax.plot(cycle, 'sandybrown', marker='.', markersize=5) # # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) for cycle in cycles: ax.plot(cycle, 'g', marker='.', markersize=5) # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) for cycle in cycles_spei: ax.plot(cycle, 'k', marker='.', markersize=5, linewidth=2) ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=15) ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=15) ax.plot(peaks_over_thres, '.r', markersize=15) ax.plot(peaks_over_thres_spei, '.c', markersize=15) ax.grid() ax.set_xlim([datetime.date(1970, 01, 01), datetime.date(2018, 04, 11)]) ax.set_ylim([-5, 500]) fig1.savefig( os.path.join('output', 'analisis', 'graficas', 'ciclos_river_discharge_spei.png')) #%% # ANALISIS CLIMATICO (0: PARA SALTARLO, 1: PARA HACERLO; LO MISMO PARA TODOS ESTOS IF) if analysis: if cdf_pdf_representation: for i in range(len(df)): # DIBUJO LAS CDF Y PDF DE LOS REGISTROS plot_analisis.cdf_pdf_registro(df[i], df[i].columns[0]) plt.pause(0.5) #%% THEORETICAL FIT CYCLES data_cycles = sample_cycles['Q'] # Empirical cdf ecdf = empirical_distributions.ecdf_histogram(data_cycles) # Fit the variable to an extremal distribution (param, x, cdf_expwbl, pdf_expwbl) = theoretical_fit.fit_distribution( data_cycles, fit_type=fun_cycles[0].name, x_min=min(data_cycles), x_max=2 * max(data_cycles), n_points=1000) par0_cycles = list() par0_cycles.append(np.asarray(param)) # GUARDO LOS PARAMETROS np.save( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy'), par0_cycles) # Check the goodness of the fit fig1 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(ecdf.index, ecdf, '.') ax.plot(x, cdf_expwbl) ax.set_xlabel('Q (m3/s)') ax.set_ylabel('CDF') ax.legend([ 'ECDF', 'Exponweib Fit', ]) ax.grid() ax.set_xlim([0, 500]) fig1.savefig( os.path.join('output', 'analisis', 'graficas', 'cdf_fit_ciclos_river_discharge.png')) # PP - Plot values (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf_expwbl, ecdf) # QQ - Plot values (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf_expwbl, ecdf) # Plot Goodness of fit theoretical_fit.plot_goodness_of_fit(cdf_expwbl, ecdf, river_discharge, 'Q', x, yppplot_emp, yqqplot_emp, yppplot_teo, yqqplot_teo) # Non-stationary fit for calms par_cycles, mod_cycles, f_mix_cycles, data_graph_cycles = list(), list( ), list(), list() df = list() df.append(data_cycles) for i in range(len(df)): # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO analisis_ = analisis.analisis(df[i], fun_cycles[i], ant[i], ordg=no_ord_cycles[i], nnorm=no_norm_cycles[i], par0=par0_cycles[i]) par_cycles.append(analisis_[0]) mod_cycles.append(analisis_[1]) f_mix_cycles.append(analisis_[2]) aux = list(analisis_[3]) aux[5] = i aux = tuple(aux) data_graph_cycles.append(aux) # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL) plot_analisis.cuantiles_ne(*data_graph_cycles[i]) plt.pause(0.5) fig2 = plt.figure(figsize=(20, 20)) plt.plot(x, pdf_expwbl) _ = plt.hist(data_cycles, bins=np.linspace(0, 500, 100), normed=True, alpha=0.5) plt.xlim([0, 400]) fig2.savefig( os.path.join('output', 'analisis', 'graficas', 'pdf_fit_ciclos_river_discharge.png')) # %% THEORETICAL FIT CALMS param0_calms = list() data_calms = sample_calms['Q'] (param, x, cdf, pdf) = theoretical_fit.fit_distribution( data_calms, fit_type=fun_calms[0].name, x_min=np.min(data_calms), x_max=1.1 * np.max(data_calms), n_points=1000) param0_calms.append(np.asarray(param)) # Empirical cdf ecdf = empirical_distributions.ecdf_histogram(data_calms) epdf = empirical_distributions.epdf_histogram(data_calms, bins=0) # PP - Plot values (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf, ecdf) # QQ - Plot values (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf, ecdf) # Plot Goodness of fit theoretical_fit.plot_goodness_of_fit(cdf, ecdf, sample_calms, 'Q', x, yppplot_emp, yqqplot_emp, yppplot_teo, yqqplot_teo) # Non-stationary fit for calms par_calms, mod_calms, f_mix_calms, data_graph_calms = list(), list( ), list(), list() df = list() df.append(data_calms) for i in range(len(df)): # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO analisis_ = analisis.analisis(df[i], fun_calms[i], ant[i], ordg=no_ord_calms[i], nnorm=no_norm_calms[i], par0=param0_calms[i]) par_calms.append(analisis_[0]) mod_calms.append(analisis_[1]) f_mix_calms.append(analisis_[2]) data_graph_calms.append(analisis_[3]) # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL) plot_analisis.cuantiles_ne(*data_graph_calms[i]) plt.pause(0.5) # Guardo parametros np.save( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy'), par_calms) np.save( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy'), mod_calms) np.save( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy'), f_mix_calms) #%% TEMPORAL DEPENDENCY if temporal_dependency: # SE UTILIZAN LOS PARAMETROS DE SALIDA DEL ANÁLISIS PREVIO # Lectura de datos par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) par_calms = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy')) mod_calms = np.load( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy')) f_mix_calms = np.load( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy')) (df_dt_cycles, cdf_) = analisis.dependencia_temporal(sample_cycles, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles) # SE GUARDAN LOS PARAMETROS DEL MODELO VAR df_dt_cycles.to_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) (df_dt_calms, cdf_) = analisis.dependencia_temporal(sample_calms, par_calms, mod_calms, no_norm_calms, f_mix_calms, fun_calms) # SE GUARDAN LOS PARAMETROS DEL MODELO VAR df_dt_calms.to_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_calms.p')) if climatic_events_fitting: #%% FIT NUMBER OF EVENTS DURING WET CYCLES events_wet_cycle = pd.Series([5, 2, 1, 3, 2, 2, 0, 6, 1]) ecdf_events_wet_cycle = empirical_distributions.ecdf_histogram( events_wet_cycle) mu = np.mean(events_wet_cycle) simulated_number_events = pd.Series( poisson.rvs(mu, loc=0, size=100, random_state=None)) ecdf_simulated_events_wet_cycle = empirical_distributions.ecdf_histogram( simulated_number_events) x_poisson = np.linspace(0, 10, 100) cdf_poisson = poisson.cdf(x_poisson, mu, loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_events_wet_cycle.index, ecdf_events_wet_cycle, '.') ax.plot(ecdf_simulated_events_wet_cycle.index, ecdf_simulated_events_wet_cycle, '.') ax.plot(x_poisson, cdf_poisson) ax.legend(['ECDF', 'ECDF Sim', 'Poisson Fit']) ax.grid() #%% FIT TIME BETWEEN WET CYCLES t_wet_cycles = peaks_over_thres_spei.index.to_series().diff().dropna( ).astype('m8[s]').astype(np.float32) ecdf_t_wet_cycle = empirical_distributions.ecdf_histogram(t_wet_cycles) norm_param = norm.fit(t_wet_cycles, loc=0) simulated_t_wet_cycles = pd.Series( norm.rvs(*norm_param, size=100, random_state=None)) ecdf_simulated_t_wet_cycles = empirical_distributions.ecdf_histogram( simulated_t_wet_cycles) x_norm = np.linspace(0, 2 * max(t_wet_cycles), 100) cdf_norm = norm.cdf(x_norm, *norm_param) plt.figure() ax = plt.axes() ax.plot(ecdf_t_wet_cycle.index, ecdf_t_wet_cycle, '.') ax.plot(ecdf_simulated_t_wet_cycles.index, ecdf_simulated_t_wet_cycles, '.') ax.plot(x_norm, cdf_norm) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_wet_cycles_days = simulated_t_wet_cycles.astype('m8[s]') # Elimino valores negativos simulated_t_wet_cycles_days = simulated_t_wet_cycles_days[ simulated_t_wet_cycles_days.values > datetime.timedelta(days=1)] #%% FIT TIME BETWEEN EVENTS DURING WET CYCLES t_between_events = peaks_over_thres.index.to_series().diff().dropna() t_between_events = t_between_events[ t_between_events < datetime.timedelta(days=400)] t_between_events = t_between_events.astype('m8[s]').astype(np.float32) ecdf_t_between_events = empirical_distributions.ecdf_histogram( t_between_events) lambda_par = expon.fit(t_between_events, loc=0) simulated_t_between_events = pd.Series( expon.rvs(scale=lambda_par[1], size=100, random_state=None)) ecdf_simulated_t_between_events = empirical_distributions.ecdf_histogram( simulated_t_between_events) x_expon = np.linspace(0, 2 * max(t_between_events), 100) cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_t_between_events.index, ecdf_t_between_events, '.') ax.plot(ecdf_simulated_t_between_events.index, ecdf_simulated_t_between_events, '.') ax.plot(x_expon, cdf_expon) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_between_events_days = simulated_t_between_events.astype( 'm8[s]') #%% FIT TIME BETWEEN ALL EVENTS # Fit time between events (without considering wet cycles) 2 method t_between_events_2method = peaks_over_thres.index.to_series().diff( ).dropna() t_between_events_2method = t_between_events_2method.astype( 'm8[s]').astype(np.float32) ecdf_t_between_events_2method = empirical_distributions.ecdf_histogram( t_between_events_2method) lambda_par = expon.fit(t_between_events_2method, loc=0) simulated_t_between_events_2method = pd.Series( expon.rvs(scale=lambda_par[1], size=100, random_state=None)) ecdf_simulated_t_between_events_2method = empirical_distributions.ecdf_histogram( simulated_t_between_events_2method) x_expon = np.linspace(0, 2 * np.max(t_between_events_2method), 100) cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_t_between_events_2method.index, ecdf_t_between_events_2method, '.') ax.plot(ecdf_simulated_t_between_events_2method.index, ecdf_simulated_t_between_events_2method, '.') ax.plot(x_expon, cdf_expon) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_between_events_2method_days = simulated_t_between_events.astype( 'm8[s]') # nul_values = simulated_t_between_events_2method_days.values > datetime.timedelta(days=2000) #%% SIMULACION CLIMÁTICA CHEQUEO UMBRAL OPTIMO PARA AJUSTAR DURACIONES if threshold_checking_for_simulation: # CARGO PARÁMETROS par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) df_dt_cycles = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) vars_ = ['Q'] # Cargo el SPEI Index para ajustar tiempo entre ciclos humedos, numero de eventos por ciclo humedo # tiempo entre eventos dentro de ciclo humedo # Figura de las cdf y pdf empiricas fig1, axes1 = plt.subplots(1, 2, figsize=(20, 7)) cont = 0 iter = 0 while cont < no_sim: df_sim = simulacion.simulacion(anocomienzo, duracion, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles, vars_, sample_cycles, df_dt_cycles, [0, 0, 0, 0, 0], semilla=int( np.random.rand(1) * 1e6)) iter += 1 # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25: # Representacion de la serie plt.figure() ax = plt.axes() ax.plot(df_sim) ax.plot(sample_cycles, '.') ax.plot(df_sim * 0 + max(sample_cycles['Q']), 'r') ax.grid() # Cdf Pdf data = df_sim['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, '--', color='0.75') axes1[1].plot(ecdf.index, ecdf, '--', color='0.75') # Extract cycles from data for different thresholds to fix the duration fig2, axes2 = plt.subplots(1, 2, figsize=(20, 7)) if cont == 0: dur_cycles = dur_cycles.astype('m8[s]').astype( np.float32) # Convierto a segundos y flotante ecdf_dur = empirical_distributions.ecdf_histogram(dur_cycles) epdf_dur = empirical_distributions.epdf_histogram(dur_cycles, bins=0) axes2[0].plot(epdf_dur.index, epdf_dur, 'r', lw=2) axes2[1].plot(ecdf_dur.index, ecdf_dur, 'r', lw=2) threshold = np.arange(20, 110, 10) color_sequence = [ '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5' ] for j, th in enumerate(threshold): minimum_interarrival_time = pd.Timedelta('1 hour') minimum_cycle_length = pd.Timedelta('2 days') cycles, calm_periods, info = extremal.extreme_events( df_sim, 'Q', th, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # Calculate duration of the cycles dur_cycles_sim = extremal.events_duration(cycles) dur_cycles_sim_description = dur_cycles_sim.describe() # Represent cycles fig3 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(df_sim) ax.axhline(th, color='lightgray') ax.grid() ax.legend([ 'Threshold: ' + str(th) + ' (m3/s)' + '/ Dur_min ' + str(dur_cycles_description['min']) + ' - ' + str(dur_cycles_sim_description['min']) + '/ Dur_mean ' + str(dur_cycles_description['mean']) + ' - ' + str(dur_cycles_sim_description['mean']) + '/ Dur_max ' + str(dur_cycles_description['max']) + ' - ' + str(dur_cycles_sim_description['max']) ]) for cycle in cycles: ax.plot(cycle, 'g', marker='.', markersize=5) ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) ax.set_xlim([ datetime.date(2018, 04, 01), datetime.date(2030, 01, 01) ]) ax.set_ylim([0, 600]) fig_name = 'ciclos_sim_' + str(cont) + '_threshold_' + str( th) + '.png' fig3.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) # Calculate the cdf and pdf of the cycle duration dur_cycles_sim = dur_cycles_sim.astype('m8[s]').astype( np.float32) ecdf_dur_sim = empirical_distributions.ecdf_histogram( dur_cycles_sim) epdf_dur_sim = empirical_distributions.epdf_histogram( dur_cycles_sim, bins=0) axes2[0].plot(epdf_dur_sim.index, epdf_dur_sim, '--', color=color_sequence[j], label=['Threshold: ' + str(threshold[j])]) axes2[1].plot(ecdf_dur_sim.index, ecdf_dur_sim, '--', color=color_sequence[j], label=['Threshold: ' + str(threshold[j])]) axes2[0].legend() axes2[1].set_xlim([0, 5000000]) axes2[0].set_xlim([0, 5000000]) fig_name = 'ciclos_dur_sim_' + str(cont) + '.png' fig2.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) cont += 1 data = sample_cycles['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, 'r', lw=2) axes1[1].plot(ecdf.index, ecdf, 'r', lw=2) fig_name = 'pdf_cdf_descarga_fluvial.png' fig1.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) #%% SIMULACION CLIMATICA threshold = 50 minimum_interarrival_time = pd.Timedelta('1 hour') minimum_cycle_length = pd.Timedelta('2 days') if simulation_cycles: # CARGO PARÁMETROS par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) par_calms = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy')) mod_calms = np.load( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy')) f_mix_calms = np.load( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy')) df_dt_cycles = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) df_dt_calms = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_calms.p')) vars_ = ['Q'] # Figura de las cdf y pdf empiricas fig2, axes1 = plt.subplots(1, 2, figsize=(20, 7)) cont = 0 iter = 0 while cont < no_sim: df_sim = simulacion.simulacion(anocomienzo, duracion, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles, vars_, sample_cycles, df_dt_cycles, [0, 0, 0, 0, 0], semilla=int( np.random.rand(1) * 1e6)) iter += 1 # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25: df_sim = df_sim.resample('1H').interpolate() # Extract cycles from data for different thresholds to fix the duration if cont == 0: dur_cycles = dur_cycles.astype('m8[s]').astype( np.float32) # Convierto a segundos y flotante # Calculate cycles cycles, calm_periods, info = extremal.extreme_events( df_sim, 'Q', threshold, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # # Represent cycles # fig3 = plt.figure(figsize=(20, 20)) # ax = plt.axes() # ax.plot(df_sim) # ax.axhline(threshold, color='lightgray') # ax.grid() # # for cycle in cycles: # ax.plot(cycle, 'g', marker='.', markersize=5) # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) # ax.set_xlim([datetime.date(2018, 01, 01), datetime.date(2021, 01, 01)]) # ax.set_ylim([0, 600]) # fig3.savefig(os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', # 'ciclos_cadiz_simulado_' + str(cont).zfill(4) + '.png')) # Start to construct the time series indices = pd.date_range(start='2018', end='2100', freq='1H') df_simulate = pd.DataFrame(np.zeros((len(indices), 1)) + 25, dtype=float, index=indices, columns=['Q']) # The start is in wet cycles cont_wet_cicles = 0 cont_df_events = 1 t_ini = datetime.datetime(2018, 01, 01) t_end = datetime.datetime(2018, 01, 01) while t_end < datetime.datetime(2090, 01, 01): if cont_wet_cicles != 0: t_ini = t_end + simulated_t_wet_cycles_days[ cont_wet_cicles] year = t_ini.year else: year = 2018 # Select the number of events during wet cycle n_events = simulated_number_events[cont_wet_cicles] - 1 cont_wet_cicles += 1 if n_events != 0: # for j in range(0, n_events): cont_df_events_in_wet_cycles = 0 while cont_df_events_in_wet_cycles <= n_events: if cont_df_events_in_wet_cycles != 0: # Time between events year = year + 1 # Select the event cycle = cycles[cont_df_events] if np.max(cycle) >= 150: # Simulate date month1 = [ random.randint(1, 3), random.randint(10, 12) ] rand_pos = random.randint(0, 1) month = month1[rand_pos] day = random.randint(1, 28) hour = random.randint(0, 23) else: # Simulate date month = random.randint(1, 12) day = random.randint(1, 28) hour = random.randint(0, 23) t_ini = datetime.datetime(year, month, day, hour) pos_ini = np.where( df_simulate.index == t_ini)[0][0] pos_end = pos_ini + cycle.shape[0] # Insert cycle df_simulate.iloc[pos_ini:pos_end, 0] = cycle.values t_end = df_simulate.index[pos_end] year = df_simulate.index[pos_end].to_datetime( ).year cont_df_events += 1 cont_df_events_in_wet_cycles += 1 else: t_end = t_ini # Simulation of calm periods df_sim_calms = simulacion.simulacion( anocomienzo, 85, par_calms, mod_calms, no_norm_calms, f_mix_calms, fun_calms, vars_, sample_calms, df_dt_calms, [0, 0, 0, 0, 0], semilla=int(np.random.rand(1) * 1e6)) # Remove negative values df_sim_calms[df_sim_calms < 0] = np.random.randint(1, 5) # Combine both dataframes with cycles and calms pos_cycles = df_simulate >= 50 df_river_discharge = df_sim_calms df_river_discharge[pos_cycles] = df_simulate # Hourly interpolation df_river_discharge = df_river_discharge.resample( 'H').interpolate() # Representation of results fig1 = plt.figure(figsize=(20, 10)) ax = plt.axes() ax.plot(river_discharge) ax.plot(df_river_discharge) ax.legend('Hindcast', 'Forecast') ax.grid() ax.set_ylim([-5, 500]) fig1.savefig( os.path.join( 'output', 'simulacion', 'graficas', 'descarga_fluvial', 'descarga_fluvial_cadiz_simulado_' + str(cont).zfill(4) + '.png')) # Cdf Pdf data = df_river_discharge['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, '--', color='0.75') axes1[1].plot(ecdf.index, ecdf, '--', color='0.75') # Guardado de ficheros df_river_discharge.to_csv(os.path.join( 'output', 'simulacion', 'series_temporales', 'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' + str(cont).zfill(4) + '.txt'), sep=n(b'\t')) cont += 1 data = river_discharge['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, 'r', lw=2) axes1[1].plot(ecdf.index, ecdf, 'r', lw=2) fig_name = 'pdf_cdf_descarga_fluvial.png' fig2.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', fig_name))
#coding:utf8 import numpy as np from scipy.stats import norm from bokeh.io import push_notebook, show, output_notebook, curdoc from bokeh.layouts import row, column, widgetbox, layout, gridplot from bokeh.plotting import figure, output_file, show, ColumnDataSource from bokeh.models import CustomJS, Select, Slider, TextInput, Spinner from bokeh.models.glyphs import MultiLine from bokeh.models.widgets import Div from bokeh import palettes size = 20 X = norm.rvs(size=(size, 2), random_state=42) * 2 X = np.dot(X, np.linalg.cholesky([[1, .8], [.8, .8]])) x = X[:, 0] y = X[:, 1] index = np.argsort(x) # import ipdb; ipdb.set_trace() x = np.sort(x) y = y[index] pred = np.nan*np.zeros(len(X)) error = np.nan*np.zeros(len(X)) # 表示的是残差间的点 error_0s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]
page_source = page_response.content print('{}/{}'.format(num_pages - page_iter + 1, num_pages), page_response, page_link) if (page_response.status_code != 200) or (page_source is None): print('Connection aborted. Pause for {} seconds'.format(BREAK_TIME)) time.sleep(BREAK_TIME) page_link = QUERY.format(page_iter) page_response = requests.get( page_link, headers={'User-Agent': UserAgent().chrome}) access_time.append(datetime.now()) page_source = page_response.content page_soup = BeautifulSoup(page_source, "lxml") page_descriptions_soup = page_soup.find_all('div', {'class': "description"}) query_soup.append(page_descriptions_soup) time.sleep(expon.rvs(28, 9) + norm.rvs(5, 7)) page_iter = page_iter + 1 print('len(access time): ', len(access_time)) print('len(query soup): ', len(query_soup)) flats = [] for page_iter, page in enumerate(query_soup): for description in page: flat_soup = {} for key in TAGS: for param in TAGS[key]: flat_soup[param] = description.find(TAGS[key][param][0], TAGS[key][param][1]) flat_params = {} for param in TAGS['text_tags']: if flat_soup[param] is not None:
def __call__(self): self.clouds = [] np.random.seed(11 + self.random_seed) a = np.random.uniform(low=0., high=1., size=self.n) self.phi = 2. * np.pi * a if self.model == 'Spherical': self.r = norm.rvs(loc=self.R_params[0], scale=self.R_params[1], size=self.n) v = np.random.uniform(low=0., high=1., size=self.n) self.theta = np.arccos(2. * v - 1.) coord_array = coord.PhysicsSphericalRepresentation( self.phi * u.rad, self.theta * u.rad, self.r * u.kpc) self.cartesian_galactocentric = self.cartesianize_coordinates( coord_array) self.heliocentric_coordinates() for i, x, p, t, d, latit, longit in zip(np.arange(self.n), self.r, self.phi, self.theta, self.d_sun, self.lat, self.long): if x <= 0.: self.r[i] = np.random.uniform(low=0., high=1., size=1) x = self.r[i] c = Cloud(i, x, p, t, size=None, em=None) c.assign_sun_coord(d, latit, longit) self.clouds.append(c) else: self.r = self.phi * 0. rbar = self.R_params[2] if self.model == 'Axisymmetric': np.random.seed(self.random_seed + 29) self.r = norm.rvs(loc=self.R_params[0], scale=self.R_params[1], size=self.n) negs = np.ma.masked_less(self.r, 0.) #central molecular zone self.r[negs.mask] = 0. elif self.model == 'LogSpiral': #the bar is assumed axisymmetric and with an inclination angle phi0~25 deg as #it has been measured by F*x et al. 1999 phi_0 = np.deg2rad(25.) self.phi += phi_0 subsize = np.int(self.n / 10) self.r[0:subsize] = norm.rvs(loc=self.R_params[0], scale=self.R_params[1], size=subsize) #np.random.uniform(low=0.,high=8.,size=self.n/4) rscale = rbar / 1.5 self.r[subsize:self.n],self.phi[subsize:self.n]=log_spiral_radial_distribution2(\ rbar,phi_0,self.n-subsize,self.R_params[0],self.R_params[1]) #self.r[subsize:self.n]=log_spiral_radial_distribution(self.phi[subsize:self.n],rbar,phi_0) #simulate the bar arr = np.ma.masked_less(self.r, rbar) self.r[arr.mask] = abs( np.random.normal(loc=0., scale=rscale, size=len(self.r[arr.mask]))) negs = np.ma.masked_less(self.r, 0.) #central molecular zone self.r[negs.mask] = 0. #the thickness of the Galactic plane is function of the Galactic Radius roughly as ~ 100 pc *cosh((x/R0) ), with R0~10kpc # for reference see fig.6 of Heyer and Dame, 2015 sigma_z0 = self.z_distr[0] R_z0 = self.z_distr[1] sigma_z = lambda R: sigma_z0 * np.cosh((R / R_z0)) self.zeta = self.phi * 0. np.random.seed(self.random_seed + 19) for i, x, p in zip(np.arange(self.n), self.r, self.phi): self.zeta[i] = np.random.normal(loc=0., scale=sigma_z(x)) self.clouds.append( Cloud(i, x, p, self.zeta[i], size=None, em=None)) coord_array = coord.CylindricalRepresentation( self.r * u.kpc, self.phi * u.rad, self.zeta * u.kpc) self.cartesian_galactocentric = self.cartesianize_coordinates( coord_array) self.heliocentric_coordinates() for c, d, latit, longit in zip(self.clouds, self.d_sun, self.lat, self.long): c.assign_sun_coord(d, latit, longit) self.L = np.array(self.sizes) self.healpix_vecs = self.compute_healpix_vec() self.W = self.get_pop_emissivities_sizes()[0]
# plot observed data x_plot = np.linspace(136, 180, len(d2))[:, np.newaxis] kde = KernelDensity(kernel='gaussian', bandwidth=2) kde.fit(d2) y = np.exp(kde.score_samples(x_plot)) plt.plot(x_plot, y) plt.show() pm.kdeplot(d2) plt.xlabel('height') plt.ylabel('density') plt.title('Prior') plt.show() # code chunk 4.13 (set up prior) sample_mu = norm.rvs(loc=178, scale=20, size=1000) sample_sigma = uniform.rvs(0, 50, 1000) prior_h = norm.rvs(sample_mu, sample_sigma, 1000) sns.set_theme(style='darkgrid') ax = sns.kdeplot(prior_h, bw=2) ax.set(xlabel='height', title='Prior') plt.show() # code chunk 4.14 (grid estimation) mu_grid = np.linspace(140, 160, 200) sigma_grid = np.linspace(4, 9, 200) post_list = [sum(norm.logpdf(d2, m, s)) for m in mu_grid for s in sigma_grid] post_ll = np.concatenate(post_list, axis=0) mu_grid_rep = np.repeat(mu_grid, 200) sigma_grid_rep = np.tile(sigma_grid, 200)
def white_noise(n, nb_sensor): noise = [] for i in range(n): noise += [norm.rvs(size=nb_sensor, loc=mean, scale=standard_deviation)] return noise
'G3vOTHER': [-1 / 8, -1 / 8, 1, -1 / 8, -1 / 8, -1 / 8, -1 / 8, -1 / 8, -1 / 8] } if dataSource == "Random": np.random.seed(47405) ysdtrue = 4.0 a0true = 100 atrue = [2, -2] # sum to zero npercell = 8 x = [] y = [] for xidx in range(len(atrue)): for subjidx in range(npercell): x.append(xidx) y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue)) Ntotal = len(y) NxLvl = len(set(x)) # # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD: contrast_dict = None for g1idx in range(NxLvl): for g2idx in range(g1idx + 1, NxLvl): cmpVec = np.repeat(0, NxLvl) cmpVec[g1idx] = -1 cmpVec[g2idx] = 1 contrast_dict = (contrast_dict, cmpVec) z = (y - np.mean(y)) / np.std(y) ## THE MODEL. with pm.Model() as model:
def sample(self, N): return self.m + np.sqrt(self.sigma2) * norm.rvs(size=N)
def particle_dynamics\ (BoxDim, nPart, nTime, dyN, speed, dt,\ dir_pos_data, dir_posCon_data, dir_vel_data, dir_dynamics,\ start_time): ################################################################################################### #### generate initial particle positions xPos0,yPos0,zPos0,phi0,theta0 = \ initialize_particle_pos(BoxDim, nPart, dir_pos_data,dir_posCon_data,dir_vel_data) xPos = xPos0 yPos = yPos0 zPos = zPos0 phi = phi0 theta = theta0 # ##### continous positions for MSD calculations # xPosCon = xPos0 # yPosCon = yPos0 # zPosCon = zPos0 ################################################################################################### ### allocate for dynamics output m_speed = np.zeros(nTime) ### create gaussian distribution for velocity distribution of particles (ensemble NOT time) v_distr = norm.rvs(size=1*nPart, scale=1.) ## std = 1, mean = 0 ##################################################################### #### loop over timesteps to move the particles # particle positions are updated every timestep for nt in (np.arange(nTime)+1): # can not be parallized in a simple way #### time processed if nt % 500 == 0: print ('working on timestep ' + str(nt) + ' out of ' + str(nTime+1)) elapsed_time = time.time() - start_time el_min, el_sec = divmod(elapsed_time, 60) el_hrs, el_min = divmod(el_min, 60) print ('elapsed time: %d:%02d:%02d' % (el_hrs,el_min,el_sec)) #### particle displacement xPos, yPos, zPos, phi, theta, m_speed_nt = particle_displacement\ (BoxDim, nPart, speed, dyN, \ xPos, yPos, zPos, phi, theta, dt, nt, v_distr, \ dir_pos_data, dir_posCon_data, dir_vel_data) m_speed[nt-1] = m_speed_nt ########################################################## ## end loop over time steps ############################################################################################## ### save mean speed to file file_dy_out = (dir_dynamics + 'dynamics_' + str('{:0>8d}'.format(nPart)) + '_Ntimestep' + str('{:0>4d}'.format(nTime)) + '.dat') np.savetxt(file_dy_out, np.transpose((np.arange(nTime),m_speed, np.ones(nTime)*speed)), \ fmt='%e', delimiter=' ', newline='\n')
def run(popsize, max_years, mutation_probability): ''' The arguments to this function are what they sound like. Runs genetic_algorithm on various knapsack problem instances and keeps track of tabular information with this schema: DIFFICULTY YEAR HIGH_SCORE AVERAGE_SCORE BEST_PLAN ''' table = pd.DataFrame(columns=[ "DIFFICULTY", "YEAR", "HIGH_SCORE", "AVERAGE_SCORE", "BEST_PLAN" ]) sanity_check = (10, [10, 5, 8], [100, 50, 80]) chromosomes = genetic_algorithm(sanity_check, popsize, max_years, mutation_probability) for year, data in enumerate(chromosomes): year_chromosomes, fitnesses = data table = table.append( { 'DIFFICULTY': 'sanity_check', 'YEAR': year, 'HIGH_SCORE': max(fitnesses), 'AVERAGE_SCORE': np.mean(fitnesses), 'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)] }, ignore_index=True) easy = (20, [20, 5, 15, 8, 13], [10, 4, 11, 2, 9]) chromosomes = genetic_algorithm(easy, popsize, max_years, mutation_probability) for year, data in enumerate(chromosomes): year_chromosomes, fitnesses = data table = table.append( { 'DIFFICULTY': 'easy', 'YEAR': year, 'HIGH_SCORE': max(fitnesses), 'AVERAGE_SCORE': np.mean(fitnesses), 'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)] }, ignore_index=True) medium = (100, [ 13, 19, 34, 1, 20, 4, 8, 24, 7, 18, 1, 31, 10, 23, 9, 27, 50, 6, 36, 9, 15 ], [ 26, 7, 34, 8, 29, 3, 11, 33, 7, 23, 8, 25, 13, 5, 16, 35, 50, 9, 30, 13, 14 ]) chromosomes = genetic_algorithm(medium, popsize, max_years, mutation_probability) for year, data in enumerate(chromosomes): year_chromosomes, fitnesses = data table = table.append( { 'DIFFICULTY': 'medium', 'YEAR': year, 'HIGH_SCORE': max(fitnesses), 'AVERAGE_SCORE': np.mean(fitnesses), 'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)] }, ignore_index=True) hard = (5000, norm.rvs(50, 15, size=100), norm.rvs(200, 60, size=100)) chromosomes = genetic_algorithm(hard, popsize, max_years, mutation_probability) for year, data in enumerate(chromosomes): year_chromosomes, fitnesses = data table = table.append( { 'DIFFICULTY': 'hard', 'YEAR': year, 'HIGH_SCORE': max(fitnesses), 'AVERAGE_SCORE': np.mean(fitnesses), 'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)] }, ignore_index=True) for difficulty_group in ['sanity_check', 'easy', 'medium', 'hard']: group = table[table['DIFFICULTY'] == difficulty_group] bestrow = group.ix[group['HIGH_SCORE'].argmax()] print( "Best year for difficulty {} is {} with high score {} and chromosome {}" .format(difficulty_group, int(bestrow['YEAR']), bestrow['HIGH_SCORE'], bestrow['BEST_PLAN'])) table.to_pickle( "results.pkl" ) #saves the performance data, in case you want to refer to it later. pickled python objects can be loaded back at any later point. pass
0.1]) #with probability wieghts in 100 iterations simulate(1000) simulate(1000, [0.2, 0.3, 0.2, 0.1, 0.1, 0.1]) """2nd question for generating random data for Multi-Linear regression""" import numpy as np import pandas as pd import random from scipy.stats import norm random.seed(1) #Y=b0+b1x1+b2x2 is the equation I choose X = [] for i in range(2): X_i = norm.rvs(0, 1, 100) X.append(X_i) eps = norm.rvs(0, 0.25, 100) y = 1 + (0.4 * X[0]) + eps + (0.5 * X[1]) data_mlr = {'X0': X[0], 'X1': X[1]} df = pd.DataFrame(data_mlr) print(df) """Data for logistic regression""" n_features = 4 X = [] for i in range(n_features): X_i = norm.rvs(0, 1, 100) X.append(X_i) a1 = (np.exp(1 + (0.5 * X[0]) + (0.4 * X[1]) + (0.3 * X[2]) + (0.5 * X[3])) /
def sqrt_normal_rvs(mu, sigma=1, random_state=None): return norm.rvs(loc=mu**0.5, scale=sigma, random_state=random_state)**2
plot_x = arange_inc(1, 9, 0.05) plot_f = f(plot_x) # Create a proposal distribution by hand by looking at the chart # Experiment to get M as small as possible mu1 = 5.8 sigma1: float = np.std(plot_x)*0.9 # Proposal distribution g(x) (NOT majorized) g1: funcType = lambda x : norm.pdf(x, loc=mu1, scale=sigma1) plot_g1 = g1(plot_x) M1: float = np.max(plot_f / plot_g1)*1.01 plot_g1_maj = M1 * plot_g1 print(f'Proposal Distribution and Majorizer for rejection sampling.') print(f'mu={mu1:0.6f}, sigma={sigma1:0.6f}, M={M1:0.6f}') # Define the sampling distribution for the chosen proposal distribution g(x) g1_sample = lambda : norm.rvs(loc=mu1, scale=sigma1) # Plot the PDF f_X(x) and the majorizing distribution Mg(x) fig, ax = plt.subplots() fig.set_size_inches([16, 8]) ax.set_title('PDF $f_X(x)$ and its Majorizer $Mg(x)$') ax.set_xlabel('x') ax.set_ylabel('$f_X(x)$') ax.set_xlim([1,9]) ax.plot(plot_x, plot_f, label='PDF') ax.plot(plot_x, plot_g1_maj, label='Mg(x)') ax.legend() ax.grid() plt.show()
sigma = z_sigma * y_sd # Posterior prediction: # Specify x values for which predicted y's are needed: x_post_pred = np.arange(55, 81) # Define matrix for recording posterior predicted y values at each x value. # One row per x value, with each row holding random predicted y values. post_samp_size = len(b1) y_post_pred = np.zeros((len(x_post_pred), post_samp_size)) # Define matrix for recording HDI limits of posterior predicted y values: y_HDI_lim = np.zeros((len(x_post_pred), 2)) # Generate posterior predicted y values. # This gets only one y value, at each x, for each step in the chain. for chain_idx in range(post_samp_size): y_post_pred[:, chain_idx] = norm.rvs( loc=b0[chain_idx] + b1[chain_idx] * x_post_pred, scale=np.repeat([sigma[chain_idx]], [len(x_post_pred)]), size=len(x_post_pred)) for x_idx in range(len(x_post_pred)): y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) ## Display believable beta0 and b1 values plt.figure() plt.subplot(1, 2, 1) thin_idx = 50 plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7) plt.ylabel('Standardized Intercept') plt.xlabel('Standardized Slope') plt.subplot(1, 2, 2) plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7) plt.ylabel('Intercept (ht when wt=0)')
def bm_change(self, dt, delta): change = norm.rvs(loc=0, size=1, scale=delta**2 * dt) return change
def cgv(mu, nu, minv=0., maxv=1.): "Continuous Gaussian variate" rv = norm.rvs(mu, nu) return min(max(rv, minv), maxv)
def _impl(y): mu, std = gp(y) return norm.rvs(mu, std)
dyn_e_T, dyn_tau_T,\ T, t_step) #%% plt.figure() plt.plot(control_coef_MKV['eta']) plt.title('eta') plt.figure() plt.plot(control_coef_MKV['chi']) plt.title('chi') # %% simulate a trajectory of X_t np.random.seed(seed=0) dW_t = norm.rvs(loc=0, scale=sigma * np.sqrt(t_step), size=n_step) X_t = np.zeros(n_step, dtype=float) X_t[0] = X0 for i in range(1, n_step, 1): X_t[i] = X_t[i-1] + ((dyn_a_t + dyn_b_t *control_coef_MKV['eta'][i-1]) * X_t[i-1] \ + dyn_b_t * control_coef_MKV['chi'][i-1] + dyn_c_t[i-1]) * t_step \ + dW_t[i-1] alpha_t = opt_control(X_t, control_coef_MKV['eta'], control_coef_MKV['chi'], -b2 / rt) # simulated running cost f_t = .5 * (qt * X_t**2 + bar_qt * (X_t - st * bar_mu_t_MKV)**2 + rt * alpha_t**2) # terminal cost
import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm theme_blue = '#0C2B36' theme_red = '#E04D4F' theme_green = '#00F900' mu = 5 sig = 1 # Generate some data for this demonstration. data = norm.rvs(10.0, 2.5, size=500) # Fit a normal distribution to the data: mu, std = norm.fit(data) # Plot the histogram. plt.hist(data, bins=9, density=True, alpha=0.8, color=theme_blue, edgecolor='gray') # Plot the PDF. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = norm.pdf(x, mu, std) plt.plot(x, p, theme_red, linewidth=4)