def sample(self, sampleShape): """ Sample from the distribution :param sampleShape: shape of the sample :return: data sampled from the distribution, it is a numpy array of shape 'sampleShape' """ # scipy.stats.genextreme's sampling method 'rvs seems to have a bug: # it sometimes outputs points which have 0 probability of occurrence, # I sample those values until none of the generated point have 0 prob # of occurrence data = genextreme.rvs(c=-self.shapeParam, loc=self.locParam, scale=self.scaleParam, size=sampleShape) checkTrue = 1 + self.shapeParam * ( data - self.locParam) / self.scaleParam <= 0 while np.any(checkTrue): idx = np.where(checkTrue) data[idx] = genextreme.rvs(c=-self.shapeParam, loc=self.locParam, scale=self.scaleParam, size=data[idx].shape) checkTrue = 1 + self.shapeParam * ( data - self.locParam) / self.scaleParam <= 0 return data
def test_projection(sample, _NNODES, _NRANKS, _NITER, _PROJ_NNODES, p): sone = np.reshape(sample, sample.shape[0] * sample.shape[1]) # parameteric bootstrapping with mom mblock = [] lblock = [] pblock = [] emv_block = [] pwm_block = [] x = [] i = _NNODES # perform a series of intermediate projections while i <= _PROJ_NNODES + 1: for j in range(50): # use a boostrap of size 30 per projection #stemp=np.random.permutation(sone) stemp = np.random.choice(sone, sone.shape[0], replace=True) sblock = np.reshape(stemp, (-1, _NITER)) mx1 = np.amax(sblock, axis=0) lblock.append(np.sum(mx1)) pwm_block.append( em_pwm(mx1, _NNODES * _NRANKS, i * _NRANKS) * _NITER) pwmfit = pwm_fit(mx1) r = genextreme.rvs(pwmfit[0], loc=pwmfit[1], scale=pwmfit[2], size=i * _NITER) #reshape and take the max per iteration pwmblock = np.reshape(r, (-1, _NITER)) mx2 = np.amax(pwmblock, axis=0) # append the sum of maximumns pblock.append(np.sum(mx2)) emv_block.append(emv(mx1, _NNODES * _NRANKS, i * _NRANKS) * _NITER) momfit = mom_fit(mx1) r = genextreme.rvs(momfit[0], loc=momfit[1], scale=momfit[2], size=i * _NITER) #reshape and take the max per iteration momblock = np.reshape(r, (-1, _NITER)) mx2 = np.amax(momblock, axis=0) # append the sum of maximumns mblock.append(np.sum(mx2)) x.append(i * _NRANKS) i *= 2 temp_block = mblock.copy() temp_block.extend(pblock) temp_x = x.copy() temp_x.extend(x) # get medians and CI for both mom and pwm return temp_x, temp_block
def SimulateSample(self, n=9, m=1000): """Plots the sampling distribution of the sample mean. mu: hypothetical population mean sigma: hypothetical population standard deviation n: sample size m: number of iterations """ def VertLine(x, y=1): thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3) means = [] for _ in range(m): xs = genextreme.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=n) xbar = np.mean(xs) means.append(xbar) stderr = self.RMSE(means, self.loc) print('Erro Padrão', stderr) cdf = thinkstats2.Cdf(means) ci = cdf.Percentile(5), cdf.Percentile(95) print('Intervalo de Confiança: ', ci) VertLine(ci[0]) VertLine(ci[1]) # plot the CDF thinkplot.Cdf(cdf) #thinkplot.Save(root='estimation1', # xlabel='sample mean', # ylabel='CDF', # title='Sampling distribution')
def get_rvs_data(data, validation_window, ticks=(0.7, 0.8, 0.9, 1), interpolator=interpolate.InterpolatedUnivariateSpline, **kwargs): """Get distribution data.""" arg, loc, scale = forecast_params(data=data, ticks=ticks, interpolator=interpolator, **kwargs) scale = max(0, scale) rvs_data = genextreme.rvs(c=arg, loc=loc, scale=scale, size=validation_window) return rvs_data
def datasets(): return ( pd.Series(genextreme.rvs(size=1000, c=-0.2)), pd.Series(expon.rvs(size=1000)), )
def gev_project(params, k, samples=1000): shape, loc, scale = params project_samples = [ max(genextreme.rvs(shape, loc, scale, k)) for x in range(k * samples) ] return gevfit.fit(project_samples)
emv_block=[] x=[] for i in range(_NNODES,_PROJ_NNODES+1,_NNODES): print(i) for j in range(30): stemp=np.random.permutation(sone) sblock=np.reshape(stemp, (-1,_NITER)) mx1=np.amax(sblock, axis=0) lblock.append(np.sum(mx1)) emv_block.append(emv(mx1, _NNODES, i)*_NITER) momfit=mom_fit(mx1) r = genextreme.rvs(momfit[0], loc=momfit[1], scale=momfit[2], size=i*_NITER) #reshape and take the max per iteration momblock=np.reshape(r, (-1,_NITER)) mx2=np.amax(momblock, axis=0) # append the sum of maximumns mblock.append(np.sum(mx2)) x.append(i*_NRANKS) arr_block=np.array(lblock) # get donfidence intervals using quantiles lowCI=np.percentile(arr_block, p) #highCI=np.percentile(arr_block, 100-p) block_workload=sum(lblock)/len(lblock) print("one workload with B {}".format(block_workload))
# Display the probability density function (``pdf``): x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100) ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = genextreme(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = genextreme.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c)) # True # Generate random numbers: r = genextreme.rvs(c, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def rvs(self, size, c=None, loc=None, scale=None): return genextreme.rvs(ifnone(c, self.c), ifnone(loc, self.loc()), ifnone(scale, self.scale()), size)
import numpy as np from scipy.optimize import minimize from scipy.stats import genextreme n = 3 ns = 2 p = 2 true_theta = np.array([100,30,0.1], dtype = float) true_beta = np.repeat(0.0,p) xlist = [] zlist = [] for i in range(ns): z = np.random.normal(size = n*p) z = z.reshape(n,p) x = genextreme.rvs(loc = true_theta[0], scale = true_theta[1], c = true_theta[2], size=n) xlist.append(x) zlist.append(z) def gevreg_m(xlist, zlist, lambda =0 ): p = zlist[1].shape[1] ns = len(xlist) tvec = np.repeat(0.0, ns*3+p) def lgev(x, loc = 0, scale = 1, shape = 0): if (scale <= 0) : return ( -1e+6) x = (x - loc)/scale if (shape == 0):
from scipy.stats import genextreme # genextreme.pdf(xi, mu, sigma) r = genextreme.rvs(0.5, 0.2, 0.3, size=5000) for it in r: print(it) # returns: mu, sigma, xi # # .1991 .2954 .5047 CONVGD
def gev_resample_project(samples, nsamples, k): shape, loc, scale = gevfit.fit(samples) return [max(genextreme.rvs(shape, loc, scale, k)) for x in range(nsamples)]