def get_on_off_times(t_begin, t_end, packet_num, c_pareto, sigma): mean_interval_time = (t_end - t_begin) / packet_num #on_times_abs=gprnd(c_pareto,mean_interval_time,0,1,packet_num) todo on_times_abs = genpareto.rvs(c_pareto, scale=mean_interval_time, size=packet_num) #off_times_abs=lognrnd(log(mean_interval_time),sigma,1,packet_num) todo off_times_abs = numpy.random.lognormal(mean=math.log( mean_interval_time, 2), sigma=sigma, size=packet_num) on_periods = [] counter = 1 current_time = t_begin while current_time <= t_end: # off phase current_time = current_time + off_times_abs[counter] new_on_start = current_time # on phase current_time = current_time + on_times_abs[counter] new_off_start = current_time if current_time <= t_end: on_periods.append([new_on_start, new_off_start]) counter = counter + 1 return on_periods
def _simulate_nd(self, n): samples = np.empty((n, )) u = np.random.binomial(1, self.p, n) n_samples_below = np.sum(u) below_obs = self.nd_vals[self.nd_vals <= u] n_obs_below = range(len(below_obs)) row_idx = np.random.choice(n_obs_below, size=n_samples_below) samples[u == 1] = below_obs[row_idx] n_samples_tail = n - n_samples_below posterior_idx = np.random.choice(range(self.n_posterior), n_samples_tail) posterior_xi_sample = self.posterior_xi[posterior_idx] posterior_sigma_sample = self.posterior_sigma[posterior_idx] samples[u == 0] = gp.rvs(c=posterior_xi_sample, loc=self.u, scale=posterior_sigma_sample, size=n_samples_tail) return samples
def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3, max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0, max_rounds_sd=25.0, reseed=True): # store the hyper-parameters for passing back into __init__() during resets so # the same hyper-parameters govern the next game's parameters, as the user # expects: # TODO: this is boilerplate, is there any more elegant way to do this? self.initial_wealth = float(initial_wealth) self.edge_prior_alpha = edge_prior_alpha self.edge_prior_beta = edge_prior_beta self.max_wealth_alpha = max_wealth_alpha self.max_wealth_m = max_wealth_m self.max_rounds_mean = max_rounds_mean self.max_rounds_sd = max_rounds_sd if reseed or not hasattr(self, 'np_random'): self.seed() # draw this game's set of parameters: edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta) max_wealth = round( genpareto.rvs(max_wealth_alpha, max_wealth_m, random_state=self.np_random)) max_rounds = int( round(self.np_random.normal(max_rounds_mean, max_rounds_sd))) # add an additional global variable which is the sufficient statistic for the # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and # simply is the highest wealth count we've seen to date: self.max_ever_wealth = float(self.initial_wealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.rounds_elapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(max_wealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(max_rounds + 1), # rounds elapsed spaces.Discrete(max_rounds + 1), # wins spaces.Discrete(max_rounds + 1), # losses spaces.Box(0, max_wealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, max_wealth) self.edge = edge self.wealth = self.initial_wealth self.max_rounds = max_rounds self.rounds = self.max_rounds self.max_wealth = max_wealth
def test_fit_generalized_pareto(k, sigma, n_samples=5000): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) from scipy.stats import genpareto X = genpareto.rvs(c=k, scale=sigma, size=n_samples) fit_k, fit_sigma = fit_generalized_pareto(torch.tensor(X)) assert_equal(k, fit_k, prec=0.02) assert_equal(sigma, fit_sigma, prec=0.02)
def _generate_path(self) -> np.ndarray: origin = 0 positions = [origin] # Simulate steps steps = genpareto.rvs(0.3, loc=-np.divide(27, 14), scale=1, size=self._step_n) _last_position = origin for j in range(0, self._step_n): _last_position = max(_last_position + steps[j], 0) positions.append(_last_position) return np.array(positions)
def sample(self, sampleShape): """ Sample from the distribution :param sampleShape: shape of the sample :return: data sampled from the distribution, it is a numpy array of shape 'sampleShape' """ return genpareto.rvs(c=self.shapeParam, loc=0, scale=self.scaleParam, size=sampleShape)
def __init__(self, initialWealth=25, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5, maxWealthM=200, maxRoundsMean=300, maxRoundsSD=25, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth = initialWealth self.edgePriorAlpha = edgePriorAlpha self.edgePriorBeta = edgePriorBeta self.maxWealthAlpha = maxWealthAlpha self.maxWealthM = maxWealthM self.maxRoundsMean = maxRoundsMean self.maxRoundsSD = maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round( genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = initialWealth # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(maxWealth * 100) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1]), # current wealth spaces.Discrete(maxRounds + 1), # rounds elapsed spaces.Discrete(maxRounds + 1), # wins spaces.Discrete(maxRounds + 1), # losses spaces.Box(0, maxWealth, [1]))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random'): self._seed()
def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3, max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0, max_rounds_sd=25.0, reseed=True): # store the hyper-parameters for passing back into __init__() during resets so # the same hyper-parameters govern the next game's parameters, as the user # expects: # TODO: this is boilerplate, is there any more elegant way to do this? self.initial_wealth = float(initial_wealth) self.edge_prior_alpha = edge_prior_alpha self.edge_prior_beta = edge_prior_beta self.max_wealth_alpha = max_wealth_alpha self.max_wealth_m = max_wealth_m self.max_rounds_mean = max_rounds_mean self.max_rounds_sd = max_rounds_sd if reseed or not hasattr(self, 'np_random'): self.seed() # draw this game's set of parameters: edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta) max_wealth = round(genpareto.rvs(max_wealth_alpha, max_wealth_m, random_state=self.np_random)) max_rounds = int(round(self.np_random.normal(max_rounds_mean, max_rounds_sd))) # add an additional global variable which is the sufficient statistic for the # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and # simply is the highest wealth count we've seen to date: self.max_ever_wealth = float(self.initial_wealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.rounds_elapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(max_wealth*100)) self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(max_rounds+1), # rounds elapsed spaces.Discrete(max_rounds+1), # wins spaces.Discrete(max_rounds+1), # losses spaces.Box(0, max_wealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, max_wealth) self.edge = edge self.wealth = self.initial_wealth self.max_rounds = max_rounds self.rounds = self.max_rounds self.max_wealth = max_wealth
def generate_dataset(seed: int, dataset: str, version: str, params: Dict[str, Any], overwrite: bool) -> None: path = DATA_ROOT / dataset path.mkdir(exist_ok=True) csv_path = path / f"{version}.csv" pkl_path = path / f"{version}.pkl" if not overwrite and csv_path.is_file(): L.info(f"Dataset path exists, do not continue") return row_num = params['row_num'] col_num = params['col_num'] dom = params['dom'] corr = params['corr'] skew = params['skew'] if col_num != 2: L.info("For now only support col=2!") exit(0) L.info( f"Start generate dataset with {col_num} columns and {row_num} rows using seed {seed}" ) random.seed(seed) np.random.seed(seed) # generate the first column according to skew col0 = np.arange(dom) # make sure every domain value has at least 1 value tmp = genpareto.rvs(skew - 1, size=row_num - len(col0)) # c = skew - 1, so we can have c >= 0 tmp = ((tmp - tmp.min()) / (tmp.max() - tmp.min()) ) * dom # rescale generated data to the range of domain col0 = np.concatenate((col0, np.clip(tmp.astype(int), 0, dom - 1))) # generate the second column according to the first col1 = [] for c0 in col0: col1.append( c0 if np.random.uniform(0, 1) <= corr else np.random.choice(dom)) df = pd.DataFrame(data={'col0': col0, 'col1': col1}) L.info(f"Dump dataset {dataset} as version {version} to disk") df.to_csv(csv_path, index=False) df.to_pickle(pkl_path) load_table(dataset, version) L.info(f"Finish!")
def gen_testdata(c, s): base = np.clip(norm.rvs(loc=0, scale=s, size=(n, )), 0, None) qv = np.quantile(base[base > 1], q_thresh) base[base > qv] = genpareto.rvs(c, loc=qv, scale=s, size=base[base > qv].shape) return xr.DataArray( base, dims=("time", ), coords={ "time": xr.cftime_range("1990-01-01", periods=n, calendar="noleap") }, attrs={ "units": "mm/day", "thresh": qv }, )
def __init__(self, initialWealth=25, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5, maxWealthM=200, maxRoundsMean=300, maxRoundsSD=25, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth=initialWealth self.edgePriorAlpha=edgePriorAlpha self.edgePriorBeta=edgePriorBeta self.maxWealthAlpha=maxWealthAlpha self.maxWealthM=maxWealthM self.maxRoundsMean=maxRoundsMean self.maxRoundsSD=maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = initialWealth # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(maxWealth*100) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1]), # current wealth spaces.Discrete(maxRounds+1), # rounds elapsed spaces.Discrete(maxRounds+1), # wins spaces.Discrete(maxRounds+1), # losses spaces.Box(0, maxWealth, [1]))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random') : self._seed()
def poisson_point_process(para, n, no_exceed, load=False, save=False, filename="", name=None): """ Simulates Poisson point process with 1 expected exceedance per block and number of blocks equal to number of exceedances. --------------------------------------------------------------------------- para: [mu, sigma, xi]. n: Number of observations (int). no_exceed: Number of exceedances (int). load: Loads data. save: Saves data. filename: Used to determine filename. name: Label for the data. --------------------------------------------------------------------------- Returns: GEVData. """ mu, sigma, xi = para if load: trunc_data = load_data(name) else: data = genpareto.rvs(xi, loc=mu, scale=sigma, size=no_exceed) # Determines the indices of the exceedances shuffled_idx = [x for x in range(n)] shuffle(shuffled_idx) trunc_data = [0 for _ in range(n)] for i in range(no_exceed): trunc_data[shuffled_idx[i]] = data[i] if save: np.savetxt("data/%s.csv" % name, trunc_data, delimiter=",") return GEVData(mu, trunc_data, name=name)
def _simulate_nd(self, n): samples = np.empty((n, )) u = np.random.binomial(1, self.p, n) n_samples_below = np.sum(u) below_obs = self.nd_vals[self.nd_vals <= u] n_obs_below = range(len(below_obs)) row_idx = np.random.choice(n_obs_below, size=n_samples_below) samples[u == 1] = below_obs[row_idx] n_samples_tail = n - n_samples_below samples[u == 0] = gp.rvs(c=self.xi, loc=self.u, scale=self.sigma, size=n_samples_tail) return samples
def rvs(self, size, loc=None, scale=None, shape=None): return genpareto.rvs(ifnone(shape, self.shape()), ifnone(loc, self.loc()), ifnone(scale, self.scale()), size)
import numpy as np from scipy.stats import genpareto import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) c = 0.1 mean, var, skew, kurt = genpareto.stats(c, moments='mvsk') x = np.linspace(genpareto.ppf(0.01, c),genpareto.ppf(0.99, c), 100) ax.plot(x, genpareto.pdf(x, c),'r-', lw=5, alpha=0.6, label='genpareto pdf') rv = genpareto(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = genpareto.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], genpareto.cdf(vals, c)) r = genpareto.rvs(c, size=1000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
eentry3.insert(10, "2") tk.Button(root5, text='Import Data', command=getfakedata).grid(row=6, column=1, sticky=tk.W, pady=4) tk.Button(root5, text='Continue', command=root5.destroy).grid(row=6, column=0, sticky=tk.W, pady=4, padx=4) root5.mainloop() testdata = [ genpareto.rvs(fakedata[len(fakedata) - 3], loc=fakedata[len(fakedata) - 2], scale=fakedata[len(fakedata) - 1]) for i in range(1000) ] testdata = sorted(testdata) #Shifting Test data from uncalibrated readings... (Negative readings) #if min(testdata)<0 and fileName != '': # utestdata = sorted(testdata) # testdata = [] # mindatapoint = abs(min(utestdata)) # for data in utestdata: # testdata.append(mindatapoint+data) print 'Please enter values for Threshold Step Size, Threshold Min Value, and Threshold Max Value' print 'Then click "Display Plot", when satisfied click "Continue" and note your selection of Threshold'
from scipy.stats import genpareto import matplotlib.pyplot as plt import numpy as np import warnings warnings.filterwarnings("ignore") sigma = 1.7 gamma = 0.4 mu = 0 sample_size = 10000000 random_numbers = genpareto.rvs(gamma, loc=mu, scale=sigma, size=sample_size) fig, ax = plt.subplots(1, 1) ax.hist(random_numbers, density=True, histtype="stepfilled", alpha=0.2, label="rnds") ax.legend(loc="best", frameon=False) print("maximum: {}".format(max(random_numbers))) print("minimum: {}".format(min(random_numbers))) sample_mean = np.mean(random_numbers) sample_mean_sq = np.mean(random_numbers**2) sample_variance = np.var(random_numbers) sample_variance_sq = np.var(random_numbers**2) sigma_est = 0.5 * sample_mean * sample_mean_sq / ( sample_mean_sq - sample_mean**2) # OK gamma_est = 0.5 - (sample_mean**2 / (2 * (sample_mean_sq - sample_mean**2))) # ok
def fit_resample(self): resample = genpareto.rvs(self.shape, self.location, self.scale, self.size) return genpareto.fit(resample)
import numpy as np from scipy.stats import genpareto import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) c = 0.1 mean, var, skew, kurt = genpareto.stats(c, moments='mvsk') x = np.linspace(genpareto.ppf(0.01, c), genpareto.ppf(0.99, c), 100) ax.plot(x, genpareto.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genpareto pdf') rv = genpareto(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = genpareto.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], genpareto.cdf(vals, c)) r = genpareto.rvs(c, size=1000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def t_daily_Salas(self): '''started by Santiago Cataño. April 25th, 2017. From MatLab #goal: P[m] by seasonal parameters + random Pareto #dt=time step in hours, 1<dt<24 #Dt=time window in hours #Dt/dt must be integer #ref: fig21 Salas2013MSc''' k = -.02 * self.dt + .914 #shape parameter sigma = .015 * self.dt**1.706 #variance parameter theta = -.037 * self.dt + .099 #(miu) mean parameter #seasonality: #P=max(0,a*(1+b*sin(w.*t))+normrnd(0,a*cvn,1,n)); #following 2 lines are example from #https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.genpareto.html#scipy.stats.genpareto ##r = genpareto.rvs(c, size=1000) #“frozen” continuous RV object: rv = genpareto(c, loc=0, scale=1) #generate GPrandom per time step # P=[] # for i in range(0,n): # P=001*max(0,gp(k,size=n)); #[m] Pt = [] self.t_ev = [1] n_ev = 0 #number of events. It starts in 0, as index t = 0 while self.t_ev[ n_ev] <= self.n: #poisson arrival of event and random genPareto for rain value (Salas2013MSc) addRain = genpareto.rvs( k, loc=theta, scale=sigma, size=1) #[mm], to consider efficient integer values for files addRain = int(round(addRain[0])) addRain = max( 1, addRain) #trick: to record notable rain[mm] in integer series Pt.append(addRain) t_arriv = np.random.poisson(lam=self.eventT) t_arriv = max(1, t_arriv) self.t_ev.append(self.t_ev[n_ev] + t_arriv) #time of next event to be simulated # print "event #=",n_ev # if self.t_ev[n_ev]>self.n: # t_arriv=self.n-self.t_ev[n_ev-1] #trick: close series with rain # print "t_arriv=",t_arriv # addZeros=[0 for i in range(0,t_arriv-1)] # print "addZeros=",addZeros if t_arriv > 1: nZeros = min(t_arriv - 1, self.n - self.t_ev[n_ev]) # print "nZeros=",nZeros [Pt.append(0) for i in range(0, nZeros) ] #do not exceed required series lenght # print "Pt=",Pt # print "lenght Pt=",len(Pt) # print "t_event_plusNext=",self.t_ev # t=t+1 n_ev = n_ev + 1 #increase number of events # print "--------------------------" if len(Pt) < self.n: [Pt.append(0) for i in range(len(Pt) + 1, self.n + 1)] # print "filled with 0s at end of array" if self.t_ev[len(self.t_ev) - 1] > self.n: del self.t_ev[-1] #event times until series lenght # print "t_event",self.t_ev np.savetxt('doct_rain/t_ev.txt', np.transpose(self.t_ev), fmt='%d') self.Pt = np.array(Pt) # print "Pt=",self.Pt # print "mean_Pt=",self.Pt.mean() # self.Pt=theta+sigma*self.Pt #plot Pt frequencies # fig,ax=plt.subplots(1,1) # ax.hist(self.Pt) # plt.show() #plot series, longer steps to auto format date axis # #Use PANDAS to translate 1D array to series, assigning date in daily format: #assume reasonable # of timesteps if self.dt > 12: #e.g. daily during year(s) fr = str( self.dt / 24 ) + 'D' #will bring bugs when dt!=24, because pandas allow only certain integer frequencies res = 'days' else: fr = str(self.dt) + 'H' res = 'hours' # print "fr=",fr dates = pd.date_range('20/2/2018', periods=self.n, freq=fr) # print dates # df=pd.DataFrame({res:dates,'values':self.Pt}) # print df # df[res]=pd.to_datetime(df[res]) # print df # df=df.set_index(res) Pserie = pd.Series(self.Pt, index=dates) #note capital S to call function # print "Pserie",Pserie # print "dates=",dates #plot series, longer steps to auto format date axis fig, ax = plt.subplots() #one image and file per run, no overlap Pserie.plot(kind='bar') # ax.plot(df.index,df['values'],kind='bar') # loc=mdates.AutoDateLocator() # ax.xaxis.set_major_locator(loc) #exist MonthLocator(interval=number)... # ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc)) # ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # ax.xaxis_date() fig.autofmt_xdate() # plt.show() plt.savefig('doct_rain/P' + str(self.dt) + 'h.png') plt.clf return self.Pt