def draw_pareto_changing_b(b_set, num_classes, max=1000, min=1, head=0.0, tail=0.99, save_name='./pareto_ref.jpg'): fig, ax = plt.subplots(1, 1) classes = np.linspace(0, num_classes, 10*num_classes) for i, b in enumerate(b_set): rv = pareto(b) classes_x = np.linspace(pareto.ppf(head, b), pareto.ppf(tail, b), 10*num_classes) dist = rv.pdf(classes_x) * (max-min) / b + min ax.plot(classes, dist, label='alpha={}'.format(b)) plt.legend() ax.set_xlabel('sorted class index') ax.set_ylabel('sample numbers') _savefig(save_name)
def pareto_dist(b, num_classes, max, min=1, tail=0.99, display=False): """ generate a pareto distribution reference dist """ rv = pareto(b) classes = range(num_classes) classes_x = np.linspace(pareto.ppf(0.0, b), pareto.ppf(tail, b), num_classes) dist = rv.pdf(classes_x) * (max-min) / b + min dist = dist.astype(int) if display: fig, ax = plt.subplots(1, 1) ax.bar(classes, dist) plt.savefig('./data/longtail/refer_num{:d}_b{:d}_max{:d}-min{:d}.jpg'.format(num_classes, b, max, min)) return dist
def draw_pareto_changing_maxmin(b, num_classes, max=[1000, 500], min=[50, 10], head=0.0, tail=0.99, save_name='./pareto_ref.jpg'): fig, ax = plt.subplots(1, 1) classes = np.linspace(0, num_classes, 10*num_classes) for i, (max_c, min_c) in enumerate(zip(max, min)): rv = pareto(b) classes_x = np.linspace(pareto.ppf(head, b), pareto.ppf(tail, b), 10*num_classes) dist = rv.pdf(classes_x) / b * (max_c-min_c) + min_c ax.plot(classes, dist, label='{}'.format(i)) plt.legend() ax.set_ylim(0, np.max(max)+50) ax.set_xlabel('sorted class index') ax.set_ylabel('sample numbers') _savefig(save_name)
def VaR_alpha(alpha, parametros): if(parametros[0] == "gennormal"): from scipy.stats import gennorm VaR = gennorm.ppf(alpha,parametros[1],parametros[2]) elif(parametros[0] == "normal"): from scipy.stats import norm VaR = norm.ppf(alpha,parametros[1],parametros[2]) elif(parametros[0] == "gamma"): from scipy.stats import gamma VaR = gamma.ppf(alpha,parametros[1],scale=parametros[2]) elif(parametros[0] == "pareto"): from scipy.stats import pareto VaR = pareto.ppf(q=alpha,b=parametros[1],scale=parametros[2]) elif(parametros[0] == "weibull"): from scipy.stats import weibull VaR = weibull.ppf(q=alpha,b=parametros[1],scale=parametros[2]) else: #(parametros[0] == "lognorm"): from scipy.stats import lognorm VaR = lognorm.ppf(q=alpha,b=parametros[1],scale=parametros[2]) return VaR
U = np.random.uniform(0,1,n) res = beta*(U**(-1/k)-1) mean = (k/(k-1))*beta-1 variance = (k/((k-1)**2*(k-2)))*beta**2 return res, mean, variance n = 10000 beta = 1 k1 = 2.05; k2 = 2.5; k3 = 3; k4 = 4 #First k res31, mean1, var1 = paretobay(beta,k1,n) anamean1 = np.mean(res31) anavar1 = np.var(res31) x1 = np.linspace(pareto.ppf(0.01, k1),pareto.ppf(0.9999,k1),100) plt.figure() plt.hist(res31,align='mid',color='tan',edgecolor='moccasin',bins=20,density=True,stacked=True) xmin, xmax = plt.xlim() ymin, ymax = plt.ylim() plt.plot(x1-1, pareto.pdf(x1, k1),'g-', lw=2,alpha=0.6) plt.ylim(ymin,ymax) plt.title("Pareto Distributed Histogram (k=2.05)") plt.xlabel("Classes") plt.ylabel("Density") plt.show print('----Pareto with K = 2.05----') print('The theoretical mean is: {0}'.format(mean1)) print('The theoretical variance is: {0}'.format(var1)) print('The analytical mean is: {0}'.format(anamean1))
# In[128]: # Выбор параметров для распределения k = 10 x_m = 1 # In[129]: #Сгенерируйте из него выборку объёма 1000 sampleRange = paretoF(1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples') plt.ylabel('number of samples') plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = pareto.ppf(0.01, k) right = pareto.ppf(0.99, k) x = np.linspace(left, right, 100) plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf') plt.legend(loc='best') # In[150]: # values = np.array([pareto.rvs(k, size=10) for x in range(10)]) # print values # plt.hist(values.mean(axis=1), normed=True) m = [] for _ in xrange(20): m.append(np.mean(pareto.rvs(k, size=1000))) # plt.hist(m, normed=True, alpha=0.5, label='hist samples')
# plt.xlabel('Confidence intervals values for the variance') # plt.yticks([]) # plt.legend() # plt.show() # histogram 3 for Pareto k = [2.05, 2.5, 3, 4] mean_list = [] var_list = [] E_list = [] V_list = [] dic = {"k:2.05": [], "k:2.5": [], "k:3": [], "k:4": []} for i in k: U = np.random.uniform(0.0, 0.1, 10000) par1, E, Var = funcPareto.pareto(i, 10000) ppf = np.linspace(pareto.ppf(0.01, i), pareto.ppf(0.99, i), 100) pdf = pareto.pdf(ppf, i) # comparison(par1, ppf-1, pdf, "Pareto comparison for k = {0}".format(i)) E_list.append(E) V_list.append(Var) mean_list.append(np.mean(par1)) var_list.append(np.var(par1)) dic["k:2.05"] = [E_list[0], V_list[0], mean_list[0], var_list[0]] dic["k:2.5"] = [E_list[1], V_list[1], mean_list[1], var_list[1]] dic["k:3"] = [E_list[2], V_list[2], mean_list[2], var_list[2]] dic["k:4"] = [E_list[3], V_list[3], mean_list[3], var_list[3]] df = pd.DataFrame(dic, index=["Mean", "Variance", "Mean_analytical", "Variance_analytical"]) print(df)
def __init__(self, seed, speed, nr_samples, interval): np.random.seed(seed) b = 3 self.samples = (np.random.pareto(b, nr_samples) + 1) mean, var, skew, kurt = pareto.stats(b, moments='mvsk') self.gt_mean = mean self.y_values = [] self.confidence = [] self.x_values = range(2, nr_samples, interval) for i in self.x_values: s = self.samples[:i] self.y_values.append(np.mean(s)) self.confidence.append((np.std(s) / math.sqrt(len(s))) * 1.96) self.y_values = np.array(self.y_values) self.confidence = np.array(self.confidence) fig = plt.figure(figsize=(10, 10)) self.ax1 = fig.add_subplot(2, 2, (1, 2)) self.ax2 = fig.add_subplot(2, 2, 3) self.ax3 = fig.add_subplot(2, 2, 4) # history plot self.ax1.set_title('dancing bar history') self.ax1.set_xlabel('iteration') self.ax1.set_ylabel('estimated mean') self.ax1.set_xlim(0, nr_samples) self.ax1.set_ylim(np.min(self.y_values - self.confidence), np.max(self.y_values + self.confidence)) self.ax1_primitives = [] p = Polygon(self._history_polygon_xy(1), True, alpha=0.4, color='blue') self.ax1_primitives.append(p) self.ax1.add_patch(p) l = Line2D([], [], color='blue') self.ax1_primitives.append(l) self.ax1.add_line(l) self.ax1.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) # bar plot self.ax2.set_title('dancing bar') self.ax2.set_ylabel('avg sales') self.ax2.set_xlim(-0.5, 1) self.ax2.set_xticks([0.25]) self.ax2.set_xticklabels(['department XYZ']) self.ax2.set_ylim(0, np.max(self.y_values + self.confidence)) self.ax2_primitives = [] r = Rectangle((0, 0), 0.5, self.y_values[1], alpha=0.4, color='blue') self.ax2_primitives.append(r) self.ax2.add_patch(r) self.ax2.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) l = Line2D([0.25, 0.25], [ self.y_values[1] - self.confidence[1], self.y_values[1] + self.confidence[1] ], color='black') self.ax2_primitives.append(l) self.ax2.add_line(l) # pdf plot self.ax3.set_title('pareto pdf') x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) self.ax3.plot(x, pareto.pdf(x, b) + 1, 'blue', lw=1, alpha=0.6) animation.TimedAnimation.__init__(self, fig, interval=speed, blit=True, repeat=False)
from scipy.stats import pareto import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: b = 2.62 mean, var, skew, kurt = pareto.stats(b, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = pareto(b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = pareto.ppf([0.001, 0.5, 0.999], b) np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b)) # True # Generate random numbers:
# get_ipython().magic(u'matplotlib inline') # In[43]: # Выбор параметров для распределения k = 10 #Сгенерируйте из него выборку объёма 1000 sampleRange = pareto.rvs(k, size=1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples pareto') plt.ylabel('number of samples') plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = pareto.ppf(0.01, k) right = pareto.ppf(0.99, k) x = np.linspace(left, right, 100) plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf') plt.legend(loc='best') # In[57]: # функция построения гистограммы распределений выборочных средних # и плотности соответствующего нормального распределения # sizeSamples - выбороки объёма n def paretoF(sizeSamples, Ex, Dx): n = sizeSamples #генерация выборок values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])