Beispiel #1
0
def draw_pareto_changing_b(b_set, num_classes, max=1000, min=1, head=0.0, tail=0.99, save_name='./pareto_ref.jpg'):
    fig, ax = plt.subplots(1, 1)
    classes = np.linspace(0, num_classes, 10*num_classes)
    for i, b in enumerate(b_set):
        rv = pareto(b)
        classes_x = np.linspace(pareto.ppf(head, b), pareto.ppf(tail, b), 10*num_classes)
        dist = rv.pdf(classes_x) * (max-min) / b + min
        ax.plot(classes, dist, label='alpha={}'.format(b))
        plt.legend()
    ax.set_xlabel('sorted class index')
    ax.set_ylabel('sample numbers')

    _savefig(save_name)
Beispiel #2
0
def pareto_dist(b, num_classes, max, min=1, tail=0.99, display=False):
    """ generate a pareto distribution reference dist
    """
    rv = pareto(b)
    classes = range(num_classes)
    classes_x = np.linspace(pareto.ppf(0.0, b), pareto.ppf(tail, b), num_classes)
    dist = rv.pdf(classes_x) * (max-min) / b + min
    dist  = dist.astype(int)
    if display:
        fig, ax = plt.subplots(1, 1)
        ax.bar(classes, dist)
        plt.savefig('./data/longtail/refer_num{:d}_b{:d}_max{:d}-min{:d}.jpg'.format(num_classes, b, max, min))
    return dist
Beispiel #3
0
def draw_pareto_changing_maxmin(b, num_classes, max=[1000, 500], min=[50, 10], head=0.0, tail=0.99, save_name='./pareto_ref.jpg'):
    fig, ax = plt.subplots(1, 1)
    classes = np.linspace(0, num_classes, 10*num_classes)
    for i, (max_c, min_c) in enumerate(zip(max, min)):
        rv = pareto(b)
        classes_x = np.linspace(pareto.ppf(head, b), pareto.ppf(tail, b), 10*num_classes)
        dist = rv.pdf(classes_x) / b * (max_c-min_c) + min_c
        ax.plot(classes, dist, label='{}'.format(i))
        plt.legend()
    ax.set_ylim(0, np.max(max)+50)
    ax.set_xlabel('sorted class index')
    ax.set_ylabel('sample numbers')

    _savefig(save_name)
def VaR_alpha(alpha, parametros):
    
    if(parametros[0] == "gennormal"):
        from scipy.stats import gennorm
        VaR = gennorm.ppf(alpha,parametros[1],parametros[2])
        
    elif(parametros[0] == "normal"):
        from scipy.stats import norm
        VaR = norm.ppf(alpha,parametros[1],parametros[2])
    
    elif(parametros[0] == "gamma"):
        from scipy.stats import gamma
        VaR = gamma.ppf(alpha,parametros[1],scale=parametros[2])
    
    elif(parametros[0] == "pareto"):
        from scipy.stats import pareto
        VaR = pareto.ppf(q=alpha,b=parametros[1],scale=parametros[2])
    
    elif(parametros[0] == "weibull"):
        from scipy.stats import weibull
        VaR = weibull.ppf(q=alpha,b=parametros[1],scale=parametros[2])
    
    else: #(parametros[0] == "lognorm"):
        from scipy.stats import lognorm
        VaR = lognorm.ppf(q=alpha,b=parametros[1],scale=parametros[2])
        
    return VaR
Beispiel #5
0
    U = np.random.uniform(0,1,n)
    res = beta*(U**(-1/k)-1)
    mean = (k/(k-1))*beta-1
    variance = (k/((k-1)**2*(k-2)))*beta**2
    return res, mean, variance

n = 10000
beta = 1
k1 = 2.05; k2 = 2.5; k3 = 3; k4 = 4

#First k
res31, mean1, var1 = paretobay(beta,k1,n)
anamean1 = np.mean(res31)
anavar1 = np.var(res31)

x1 = np.linspace(pareto.ppf(0.01, k1),pareto.ppf(0.9999,k1),100)

plt.figure()
plt.hist(res31,align='mid',color='tan',edgecolor='moccasin',bins=20,density=True,stacked=True)
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
plt.plot(x1-1, pareto.pdf(x1, k1),'g-', lw=2,alpha=0.6)
plt.ylim(ymin,ymax)
plt.title("Pareto Distributed Histogram (k=2.05)")
plt.xlabel("Classes")
plt.ylabel("Density")
plt.show
print('----Pareto with K = 2.05----')
print('The theoretical mean is: {0}'.format(mean1))
print('The theoretical variance is: {0}'.format(var1))
print('The analytical mean is: {0}'.format(anamean1))
# In[128]:

# Выбор параметров для распределения
k = 10
x_m = 1

# In[129]:

#Сгенерируйте из него выборку объёма 1000
sampleRange = paretoF(1000)
#Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины.
plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples')
plt.ylabel('number of samples')
plt.xlabel('$x$')
#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right = pareto.ppf(0.99, k)
x = np.linspace(left, right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')

# In[150]:

# values = np.array([pareto.rvs(k, size=10) for x in range(10)])
# print values
# plt.hist(values.mean(axis=1), normed=True)

m = []
for _ in xrange(20):
    m.append(np.mean(pareto.rvs(k, size=1000)))
# plt.hist(m, normed=True, alpha=0.5, label='hist samples')
Beispiel #7
0
    # plt.xlabel('Confidence intervals values for the variance')
    # plt.yticks([])
    # plt.legend()
    # plt.show()

    # histogram 3 for Pareto
    k = [2.05, 2.5, 3, 4]
    mean_list = []
    var_list = []
    E_list = []
    V_list = []
    dic = {"k:2.05": [], "k:2.5": [], "k:3": [], "k:4": []}
    for i in k:
        U = np.random.uniform(0.0, 0.1, 10000)
        par1, E, Var = funcPareto.pareto(i, 10000)
        ppf = np.linspace(pareto.ppf(0.01, i), pareto.ppf(0.99, i), 100)
        pdf = pareto.pdf(ppf, i)
        # comparison(par1, ppf-1, pdf, "Pareto comparison for k = {0}".format(i))
        E_list.append(E)
        V_list.append(Var)
        mean_list.append(np.mean(par1))
        var_list.append(np.var(par1))
    dic["k:2.05"] = [E_list[0], V_list[0], mean_list[0], var_list[0]]
    dic["k:2.5"] = [E_list[1], V_list[1], mean_list[1], var_list[1]]
    dic["k:3"] = [E_list[2], V_list[2], mean_list[2], var_list[2]]
    dic["k:4"] = [E_list[3], V_list[3], mean_list[3], var_list[3]]
    df = pd.DataFrame(dic, index=["Mean", "Variance", "Mean_analytical", "Variance_analytical"])
    print(df)


Beispiel #8
0
    def __init__(self, seed, speed, nr_samples, interval):
        np.random.seed(seed)
        b = 3
        self.samples = (np.random.pareto(b, nr_samples) + 1)
        mean, var, skew, kurt = pareto.stats(b, moments='mvsk')
        self.gt_mean = mean

        self.y_values = []
        self.confidence = []
        self.x_values = range(2, nr_samples, interval)
        for i in self.x_values:
            s = self.samples[:i]
            self.y_values.append(np.mean(s))
            self.confidence.append((np.std(s) / math.sqrt(len(s))) * 1.96)

        self.y_values = np.array(self.y_values)
        self.confidence = np.array(self.confidence)

        fig = plt.figure(figsize=(10, 10))
        self.ax1 = fig.add_subplot(2, 2, (1, 2))
        self.ax2 = fig.add_subplot(2, 2, 3)
        self.ax3 = fig.add_subplot(2, 2, 4)

        # history plot
        self.ax1.set_title('dancing bar history')
        self.ax1.set_xlabel('iteration')
        self.ax1.set_ylabel('estimated mean')
        self.ax1.set_xlim(0, nr_samples)
        self.ax1.set_ylim(np.min(self.y_values - self.confidence),
                          np.max(self.y_values + self.confidence))

        self.ax1_primitives = []
        p = Polygon(self._history_polygon_xy(1), True, alpha=0.4, color='blue')
        self.ax1_primitives.append(p)
        self.ax1.add_patch(p)

        l = Line2D([], [], color='blue')
        self.ax1_primitives.append(l)
        self.ax1.add_line(l)

        self.ax1.axhline(y=mean, color='black', linestyle='--', linewidth=0.5)

        # bar plot
        self.ax2.set_title('dancing bar')
        self.ax2.set_ylabel('avg sales')
        self.ax2.set_xlim(-0.5, 1)
        self.ax2.set_xticks([0.25])
        self.ax2.set_xticklabels(['department XYZ'])
        self.ax2.set_ylim(0, np.max(self.y_values + self.confidence))

        self.ax2_primitives = []
        r = Rectangle((0, 0), 0.5, self.y_values[1], alpha=0.4, color='blue')
        self.ax2_primitives.append(r)
        self.ax2.add_patch(r)

        self.ax2.axhline(y=mean, color='black', linestyle='--', linewidth=0.5)

        l = Line2D([0.25, 0.25], [
            self.y_values[1] - self.confidence[1],
            self.y_values[1] + self.confidence[1]
        ],
                   color='black')
        self.ax2_primitives.append(l)
        self.ax2.add_line(l)

        # pdf plot
        self.ax3.set_title('pareto pdf')
        x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100)
        self.ax3.plot(x, pareto.pdf(x, b) + 1, 'blue', lw=1, alpha=0.6)

        animation.TimedAnimation.__init__(self,
                                          fig,
                                          interval=speed,
                                          blit=True,
                                          repeat=False)
Beispiel #9
0
from scipy.stats import pareto
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

b = 2.62
mean, var, skew, kurt = pareto.stats(b, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100)
ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = pareto(b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = pareto.ppf([0.001, 0.5, 0.999], b)
np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b))
# True

# Generate random numbers:
# get_ipython().magic(u'matplotlib inline')


# In[43]:

# Выбор параметров для распределения
k = 10
#Сгенерируйте из него выборку объёма 1000
sampleRange = pareto.rvs(k, size=1000)
#Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины.
plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples pareto')
plt.ylabel('number of samples')
plt.xlabel('$x$')

#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right =  pareto.ppf(0.99, k)
x = np.linspace(left,  right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')


# In[57]:

# функция построения гистограммы распределений выборочных средних 
# и плотности соответствующего нормального распределения 
# sizeSamples - выбороки объёма n
def paretoF(sizeSamples, Ex, Dx):
    n = sizeSamples
    #генерация выборок
    values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])