예제 #1
0
	def TV(base_oracle,weights_0,weights_1,centers_0,centers_1):
		# running counter for which batch we are processing
		counter = 0
		# running estimate for the TV distance
		result = 0
		# iterate over the batches
		for _ in range(TOTAL_SAMPS/BATCH_SIZE):
			# draw batch of samples from mixture of pareto's of size BATCH_SIZE
			points = base_oracle()
			# BATCH_SIZE x total_comps arrays consisting of distances from the pareto
			# mixture samples to all centers of the two superpositions of Airy disks
			dist_to_centers_0 = np.zeros((len(points),len(weights_0)))
			dist_to_centers_1 = np.zeros((len(points),len(weights_1)))
			# squared distance of the pareto mixture samples from the x-axis
			ysquared_diffs = points[:,1]**2
			# compute the entries of dist_to_centers_0
			for i in range(len(weights_0)):
				xdiff_to_center_i_0 = points[:,0] - centers_0[i]
				dist_to_centers_0[:,i] = np.sqrt(xdiff_to_center_i_0**2 + ysquared_diffs)
			# compute the entries of dist_to_centers_0
			for i in range(len(weights_1)):
				xdiff_to_center_i_1 = points[:,0] - centers_1[i]
				dist_to_centers_1[:,i] = np.sqrt(xdiff_to_center_i_1**2 + ysquared_diffs)

			# Boolean matrix indicating which points fall within the "augmented" part of the 
			# augmented Pareto around each of the centers
			unit_interval_points = np.hstack(((dist_to_centers_0 < 1.), (dist_to_centers_1 < 1.)))
			# pareto density (with parameter 2/3) evaluated at each sample with respect to each center
			# NOTE: 2*pi comes from polar coordinates
			pareto_densities_0 = pareto.pdf(dist_to_centers_0, 2/3.) / dist_to_centers_0 * weights_0 / (2 * np.pi)
			pareto_densities_1 = pareto.pdf(dist_to_centers_1, 2/3.) / dist_to_centers_1 * weights_1 / (2 * np.pi)
			pareto_densities = np.hstack((pareto_densities_0,pareto_densities_1))

			# density of "augmented" part of the augmented Pareto evaluated at each sample with respect to each center
			unit_interval_densities_0 = 1./dist_to_centers_0 * weights_0 / (2 * np.pi)
			unit_interval_densities_1 = 1./dist_to_centers_1 * weights_1 / (2 * np.pi)
			unit_interval_densities = np.hstack((unit_interval_densities_0,unit_interval_densities_1))

			# density of the mixture of augmented paretos at each of the points
			# NOTE: factor of /2 at the end is because proposal_densities is over both sets of centers
			proposal_densities = np.sum(pareto_densities * (1 - unit_interval_points)/2. + unit_interval_densities * unit_interval_points/2.,axis=1)/2
			
			# densities of the two superpositions of airy disks at each of the points
			D0_densities = np.sum(airy(dist_to_centers_0) * weights_0, axis=1)
			D1_densities = np.sum(airy(dist_to_centers_1) * weights_1, axis=1)

			# absolute difference in Radon-Nikodym derivatives between the two superpositions relative to the mixture of augmented paretos
			# NOTE: normalization constant pi comes from fact that integral of J_1(sqrt(x^2+y^2))/(x^2+y^2) over R^2 is pi
			ratio_diffs = np.abs((D1_densities - D0_densities)/proposal_densities) / np.pi

			# average absolute difference across this batch
			new_av = np.average(ratio_diffs)

			# update running average
			result = (result * counter + new_av)/(counter + 1.)
			counter += 1

		# output TV estimate (note TV = L1/2, hence factor of 2)
		return result/2.
예제 #2
0
def test_plot_1():
    with sciplot.style(locale_setting='en_US.UTF-8'):
        x_m = 2  # scale
        alpha_lst = [1, 2, 3, 4]  # shape parameters
        x = np.linspace(0, 6, 1000)

        pdf = np.array([pareto.pdf(x, scale=x_m, b=a) for a in alpha_lst])

        sciplot.set_size_cm(7)
        fig, ax = plt.subplots(1, 1)

        fig.suptitle(
            r'Pareto PDF' +
            r' $p(x \,|\, x_\mathrm{m}, \alpha) = \frac{\alpha x_\mathrm{m}^\alpha}{x^{\alpha+1}}$'
            + r' with $x_\mathrm{m}=2$')

        line_plot = ax.plot(x, pdf.T)

        label_lst = []
        for alpha in alpha_lst:
            label_lst.append(r'$\alpha=' + str(alpha) + '$')

        sciplot.set_legend(ax=ax,
                           plot_tpl=line_plot,
                           label_tpl=tuple(label_lst),
                           loc='upper right')

        ax.set_xlabel('$x$')
        ax.set_ylabel(r'$p(x \,|\, x_\mathrm{m}, \alpha)$')

        return fig
예제 #3
0
def real_pareto(distance_list, nodes):
    alpha = [1]  # list of values of shape parameters
    samples = np.linspace(start=0, stop=5, num=len(distance_list))
    x_m = 1  # scale
    output = None
    for a in alpha:
        output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)])
    plot_graph(output[0],
               nodes,
               'Actual Pareto (auto bin size)',
               file_name="ParetoDistn.png")
예제 #4
0
def paretoplt(alphas):
    import numpy as np
    from matplotlib import pyplot as plt
    from scipy.stats import pareto

    xm = 1  # scale
    x = np.linspace(0, 5, 1000)
    output = np.array([pareto.pdf(x, scale=xm, a) for a in alphas])
    plt.plot(x, output.T)
    plt.show()
    return
예제 #5
0
def model(param):
    # unpack the param vector
    Enu_proposed, alpha_proposed = param

    # plot the prefactor conditioned on Enu_proposed
    #print(Enu_proposed)
    #plot_prefactor(Enu_proposed)

    # regenerate f(Enu|alpha) at the proposed alpha
    wt = pareto.pdf(Enu, alpha_proposed) / weights

    kernel1D_log = kde.KDE1D(Enu_log, weights=wt, bw_method=0.04, adaptive=True, weight_adaptive_bw=True,
                                   alpha=0.3)
    return prefactor(E_reconstruted, Enu_proposed) * kernel1D_log.evaluate(np.log10(Enu_proposed))
예제 #6
0
import numpy as np
from scipy.stats import pareto
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from pathlib import Path

import sciplot

# Plot 1
with sciplot.style(theme='no-latex', locale_setting='en_US.UTF-8'):
    x_m = 2  # scale
    alpha_lst = [1, 2, 3, 4]  # shape parameters
    x = np.linspace(0, 6, 1000)

    pdf = np.array([pareto.pdf(x, scale=x_m, b=a) for a in alpha_lst])

    sciplot.set_size_cm(7)
    fig, ax = plt.subplots(1, 1)

    fig.suptitle(
        r'Pareto PDF' +
        r' $p(x \,|\, x_\mathrm{m}, \alpha) = \frac{\alpha x_\mathrm{m}^\alpha}{x^{\alpha+1}}$'
        + r' with $x_\mathrm{m}=2$')

    line_plot = ax.plot(x, pdf.T)

    label_lst = []
    for alpha in alpha_lst:
        label_lst.append(r'$\alpha=' + str(alpha) + '$')

    sciplot.set_legend(ax=ax,
예제 #7
0
#!/usr/bin/env python

# Plots Pareto distribution

from scipy.stats import pareto
import numpy as np
import matplotlib.pylab as pl

params = [(1, 3), (1, 2), (1, 1), (0.001, 1)]
styles = ['b-', 'r:', 'k-.', 'g--']
labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params]

for i, param in enumerate(params):
    m, k = param
    probabilities = pareto.pdf(np.arange(0, 2, .01), k, scale=m)
    pl.plot(np.arange(0, 2, .01), probabilities, styles[i], label=labels[i])

pl.axis([0, 2, 0, 3])
pl.title('Pareto Distribution')
pl.legend()
pl.savefig('paretoPlot.png')
pl.show()
예제 #8
0
    def pareto_likelihood(node, data=None, dtype=np.float64):
        probs = np.ones((data.shape[0], 1), dtype=dtype)
        from scipy.stats import pareto

        probs[:] = pareto.pdf(data[:, node.scope], node.a)
        return probs
예제 #9
0
파일: base.py 프로젝트: stippingerm/mypylib
 def _pdf(self, x, b, m):
     return pareto.pdf(x, b) / pareto.cdf(m, b)
예제 #10
0
import superimport

import numpy as np
import matplotlib.pyplot as plt
import pyprobml_utils as pml

from scipy.stats import pareto

params = [(0.1, 1), (0.1, 2), (0.2, 1), (0.2, 2)]
styles = ['b-', 'r:', 'k-.', 'g--']
labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params]
x = np.linspace(0, 1, 1000)

for i, param in enumerate(params):
    m, k = param
    probabilities = pareto.pdf(x, k, scale=m)
    plt.plot(x, probabilities, styles[i], label=labels[i])

plt.title('Pareto Distribution')
plt.legend()
plt.axis((0.0, 0.5, 0, 20))
pml.savefig('paretoPdf.pdf')
plt.show()

for i, param in enumerate(params):
    m, k = param
    probabilities = pareto.pdf(x, k, scale=m)
    plt.loglog(x, probabilities, styles[i], label=labels[i])

plt.xlim(0.05, 1)
plt.title('Log Pareto Distribution')
예제 #11
0
mpl.rcParams['axes.color_cycle'] = ['k', 'k', 'k', 'k', 'k', 'k']

import matplotlib.pyplot as plt
f, ax = plt.subplots(2, 2, figsize=(2 * 0.8 * 4, 2 * 0.8 * 3))

data = loadtxt(
    "/home/marius/Dokumenter/fys4150/batmobile/resources/data_2.000000_111.txt"
)
data = data.reshape(data.shape[0] * data.shape[1])
ax[0][0].hist(data,
              bins=linspace(0, 100, 1000),
              normed=True,
              facecolor="white")
m = linspace(0, 10, 1000)
ax[0][0].plot(m,
              pareto.pdf(m, 3.5, loc=-1),
              'k--',
              label=ur"$a = 3.5$",
              linewidth=1.5)
ax[0][0].set_xlim(0, 3)
ax[0][0].legend(fontsize=11)

data = loadtxt(
    "/home/marius/Dokumenter/fys4150/batmobile/resources/data_1.500000_351.txt"
)
data = data.reshape(data.shape[0] * data.shape[1])
ax[0][1].hist(data, bins=linspace(0, 100, 800), normed=True, facecolor="white")
m = linspace(0, 10, 1000)
ax[0][1].plot(m,
              pareto.pdf(m, 2.5, loc=-1),
              'k--',
# In[43]:

# Выбор параметров для распределения
k = 10
#Сгенерируйте из него выборку объёма 1000
sampleRange = pareto.rvs(k, size=1000)
#Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины.
plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples pareto')
plt.ylabel('number of samples')
plt.xlabel('$x$')

#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right =  pareto.ppf(0.99, k)
x = np.linspace(left,  right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')


# In[57]:

# функция построения гистограммы распределений выборочных средних 
# и плотности соответствующего нормального распределения 
# sizeSamples - выбороки объёма n
def paretoF(sizeSamples, Ex, Dx):
    n = sizeSamples
    #генерация выборок
    values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])
    #вычисление выборочных средних
    meanVal = values.mean(axis = 1)
    plt.hist(meanVal, normed=True, alpha=0.5, label='hist mean n ' + str(n))
# +
p_bar = 0.1  # probability threshold
p_quant = r_[arange(10**-4, p_bar + 10**-4, 10**-4),
             arange(p_bar + 0.001, 1.001, 0.001)].reshape(
                 1, -1)  # quantile probability levels
q_HFP = HFPquantile(y, p_quant, p)
y_bar = q_HFP[p_quant == p_bar]  # threshold
# data below the threshold
l_1 = where(y[0] < y_bar)[0]
l_2 = where(p_quant[0] <= p_bar)[0]
y_ex = y_bar - y[[0], l_1]  # dataset of the conditional excess distribution

csi_MLFP, sigma_MLFP = FitGenParetoMLFP(
    y_ex, p[0, l_1]
)  # Maximum Likelihood optimization with Generalized Pareto Distribution
f_MLFP = pareto.pdf(sort(y_ex), csi_MLFP, sigma_MLFP, 0)  # estimated pdf

q_MLFP, *_ = QuantileGenParetoMLFP(y_bar, p_bar, csi_MLFP, sigma_MLFP,
                                   p_quant[0, l_2])  # MLFP-quantile

q_bt = q_HFP[0, l_2]  # historical quantile below the threshold
# -

# ## Generate figures showing the unconditional MLFP-mean and standard deviation and the estimated unconditional quantile function

# +
typ = namedtuple('type', 'Entropy')
typ.Entropy = 'Exp'
ens = EffectiveScenarios(p, typ)
option = namedtuple('option', 'n_bins')
예제 #14
0
        unique, counts = np.unique(data, return_counts=True)
        if n > 10:
            max_value = np.amax(counts)
            print(max_value)
            plt.plot(unique,
                     counts * 5 / max_value,
                     label="Pareto EPMF{h}".format(h=i + 1))
        else:
            plt.plot(unique, counts, label="Pareto EPMF{h}".format(h=i + 1))
    return data


[a, xm] = [5, 1]
ddata = poligon_par(5)
plt.plot(np.unique(ddata),
         pareto.pdf(np.unique(ddata), a, scale=xm),
         color="black",
         lw=4,
         label="Pareto PMF")
plt.legend(loc='best', frameon=False)
plt.show()
ddata = poligon_par(10)
plt.plot(np.unique(ddata),
         pareto.pdf(np.unique(ddata), a, scale=xm),
         color="black",
         lw=4,
         label="Pareto PMF")
plt.legend(loc='best', frameon=False)
plt.show()
ddata = poligon_par(100)
plt.plot(np.unique(ddata),
예제 #15
0
파일: homework_2.py 프로젝트: Andreeaxhx/ML
pylab.ylim(-50, 50)
pylab.xlim(-5, 5)
pylab.xlabel('$x$')
pylab.legend()
pylab.show()

#Pareto

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pareto
x_m = 1  #scale
alpha = [1, 2]  #list of values of shape parameters
samples = np.linspace(start=0, stop=5, num=1000)  #esantionul
for a in alpha:
    output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)])
    plt.plot(samples, output.T, label='alpha {0}'.format(a))
plt.xlabel('samples', fontsize=15)
plt.ylabel('PDF', fontsize=15)
plt.title('Probability Density function', fontsize=15)
plt.grid(b=True, color='grey', alpha=0.3, linestyle='-.', linewidth=2)
plt.rcParams["figure.figsize"] = [5, 5]
plt.legend(loc='best')
plt.show()

#Exercise 3 -- Cuberschi Lucian
#(Random variables, implementation)

#Suppose you measure the temperature 10 consecutive days with a thermometer that has a small random error.

# 1.What is the mean temperature, knowing that the mean error is +1°C and the measurements are those in the variable Y below.
예제 #16
0
step = (ma - mi) / sqrt(n)
beg = round(mi, 3)
arr_x.append(beg)
for i in range(round(sqrt(n))):
    beg += step
    arr_x.append(round(beg, 3))
arr_y = list()
for j in range(round(sqrt(n))):
    count = 0
    for element in data:
        if arr_x[j] <= element and element <= arr_x[j + 1]:
            count += 1
    arr_y.append(count)
arr_x.pop()
maximum = max(arr_y)
arrr_y = list()
for each in arr_y:
    el = each * a / maximum
    arrr_y.append(el)
x = np.linspace(0, 5, 1000)
par_arr = np.array(pareto.pdf(x, scale=xm, b=a))
fig = plt.figure()
ax = fig.add_subplot()
plt.hist(arr_x,
         weights=arrr_y,
         bins=round(sqrt(n)),
         label="Pareto pdf simulation")
plt.plot(x, par_arr, color="r", label="Pareto pdf")
plt.legend(loc='best', frameon=False)
plt.show()
예제 #17
0
from scipy.stats import pareto
print(pareto.pdf(6,3,0,3))
예제 #18
0
def expected(x, i = -1):
    return pareto.pdf(x, 1)
from scipy.stats import norm
from scipy.stats import pareto


def multiply_plot(pdf1, title1, pdf2, title2):
    x = np.linspace(0, 10, 100)
    fig = plt.figure(figsize=(5, 7))
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax1.plot(x, pdf1(x))
    ax1.set_title(title1)
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax2.plot(x, pdf2(x))
    ax2.set_title(title2)
    ax3 = plt.subplot2grid((2, 2), (1, 0), colspan=2)
    prod_pdf = lambda z: pdf1(z) * pdf2(z)
    ax3.plot(x, prod_pdf(x))
    prod_name = title1 + ' x ' + title2
    ax3.set_title('Success = ' + prod_name)
    for ax in [ax1, ax2, ax3]:
        ax.set_xticks([])
        ax.set_yticks([])
    return prod_pdf, prod_name


norm_pdf = lambda x: norm.pdf(x, 5, 2)
prod_pdf, prod_name = multiply_plot(norm_pdf, 'Talent', norm_pdf, 'Effort')
prod_pdf, prod_name = multiply_plot(prod_pdf, prod_name, norm_pdf, 'Beauty')
pareto_pdf = lambda x: pareto.pdf(x, 5, 2)
prod_pdf, prod_name = multiply_plot(prod_pdf, prod_name, pareto_pdf,
                                    'IG Followers')
예제 #20
0
from scipy.stats import pareto
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

b = 2.62
mean, var, skew, kurt = pareto.stats(b, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100)
ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = pareto(b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = pareto.ppf([0.001, 0.5, 0.999], b)
np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b))
# True

# Generate random numbers:
예제 #21
0
    # plt.xlabel('Confidence intervals values for the variance')
    # plt.yticks([])
    # plt.legend()
    # plt.show()

    # histogram 3 for Pareto
    k = [2.05, 2.5, 3, 4]
    mean_list = []
    var_list = []
    E_list = []
    V_list = []
    dic = {"k:2.05": [], "k:2.5": [], "k:3": [], "k:4": []}
    for i in k:
        U = np.random.uniform(0.0, 0.1, 10000)
        par1, E, Var = funcPareto.pareto(i, 10000)
        ppf = np.linspace(pareto.ppf(0.01, i), pareto.ppf(0.99, i), 100)
        pdf = pareto.pdf(ppf, i)
        # comparison(par1, ppf-1, pdf, "Pareto comparison for k = {0}".format(i))
        E_list.append(E)
        V_list.append(Var)
        mean_list.append(np.mean(par1))
        var_list.append(np.var(par1))
    dic["k:2.05"] = [E_list[0], V_list[0], mean_list[0], var_list[0]]
    dic["k:2.5"] = [E_list[1], V_list[1], mean_list[1], var_list[1]]
    dic["k:3"] = [E_list[2], V_list[2], mean_list[2], var_list[2]]
    dic["k:4"] = [E_list[3], V_list[3], mean_list[3], var_list[3]]
    df = pd.DataFrame(dic, index=["Mean", "Variance", "Mean_analytical", "Variance_analytical"])
    print(df)


예제 #22
0
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pareto

[a, xm] = [5, 1]
N = [5, 10, 100, 1000, 10**5]

fig = plt.figure()
ax = fig.add_subplot()

for n in N:
    for i in range(5):
        data = np.zeros(n)
        for iteration in range(n):
            xi = np.random.rand()
            r = xm / xi ** (1 / a)
            data[iteration] = r
        data = np.around(data, decimals=2)
        unique, counts = np.unique(data, return_counts=True)
        ax.hist(unique, bins=int(len(unique)), weights=counts, label="Pareto EHMF{h}".format(h=i + 1), alpha=0.7, density=True)
    plt.plot(np.unique(data), pareto.pdf(np.unique(data), a, scale=xm), '-.', color="black", lw=1, label="Pareto PMF")
    plt.legend(loc='best')
    plt.show()
예제 #23
0
k = 10
x_m = 1

# In[129]:

#Сгенерируйте из него выборку объёма 1000
sampleRange = paretoF(1000)
#Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины.
plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples')
plt.ylabel('number of samples')
plt.xlabel('$x$')
#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right = pareto.ppf(0.99, k)
x = np.linspace(left, right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')

# In[150]:

# values = np.array([pareto.rvs(k, size=10) for x in range(10)])
# print values
# plt.hist(values.mean(axis=1), normed=True)

m = []
for _ in xrange(20):
    m.append(np.mean(pareto.rvs(k, size=1000)))
# plt.hist(m, normed=True, alpha=0.5, label='hist samples')

mean = pareto.mean(k)
EX = mean
예제 #24
0
from scipy.stats import pareto
import matplotlib.pyplot as plt

burst_time = 500
idle_time = 500
rate = 200
packetSize = 210
shape = 0.5

interval = (packetSize * 8) / rate
burstlen = burst_time / interval




for i in range(33):
  ##  b1 = (burstlen * (shape - 1)) / shape
  ##  b2 = (idle_time * (shape - 1)) / shape
  ##  print('B1: {} ||| B2: {}'.format(b1, b2))

    ##next_burtlen
    burstlen = int(pareto.pdf(i*-52, shape) + 0.5)
    if (burstlen == 0):
        burstlen = 1
    print('BurstLen {}: {}'.format(i, burstlen))

    ##next_idle_time
    idle_time= pareto.pdf(i*-22, shape)
    print('IdleTime {}: {}'.format(i, idle_time))
예제 #25
0
print(data)

fig, ax = plt.subplots()

# Plot the histogram
bins = list(range(0, max(data), 100))  # bins should be every 100ms
bins.append(max(data))  # also include the last one

print(bins)
plt.hist(data, bins=bins, density=True, facecolor='green', alpha=1)

# Try to fit a Pareto in the data
# shape is b (alpha in wikipedia), scale is x (x_b in wikipedia)
shape, loc, scale = pareto.fit(data)
y = pareto.pdf(bins, shape, loc=loc, scale=scale)

# Plot the Pareto on top of the bins
l = plt.plot(bins, y, 'r--', linewidth=2)

plt.xticks((0, 500, 1000, 1500) + tuple(range(2500, max(data), 5000)),
           rotation=90)
ax.grid(alpha=0.3)

#plot
plt.xlabel('Miliseconds')
plt.ylabel('Probability')
plt.title(
    "Histogram of %d circuit timeout values fitted against Pareto with shape=%.3f, loc=%.3f and scale=%.3f"
    % (len(data), shape, loc, scale))
plt.grid(True)
예제 #26
0
#PARTE C OUTRAS DISTRIBUIÇÕES

N = 500
xn = np.linspace(0, 50, N)
xc = np.linspace(0, 100, 100)

#Pareto

alpha = [1.16]
loc = 0
scale = 1

for i in alpha:

    pdf = pareto.pdf(xn, i, loc)

    plt.figure('Pareto PDF')
    plt.title('Pareto PDF')
    ax = sns.lineplot(xn, pdf, color='k')
    ax.fill_between(xn, pdf, color='olivedrab', alpha=0.2)

cdf = pareto.cdf(xc, alpha[0], loc, scale)

plt.figure('Pareto CDF')
plt.title('Pareto CDF')
ax = sns.lineplot(xc, cdf, color='red')
ax.fill_between(xc, cdf, color="firebrick", alpha=0.3)

#Gamma (com a = é uma exponencial)
a = [1, 3, 5]
예제 #27
0
# create a log data set
idx = np.where(Erec > 0.0)[0]
Enu_log = np.log10(np.take(Enu, idx))
Erec_log = np.log10(np.take(Erec, idx))

# trim the non-log data down to size
weights = np.take(weights, idx)
Enu = np.take(Enu, idx)

'''
Construct the function
f(Erec|Enu) = f(Erec, Enu|alpha) / f(Enu|alpha)
'''
# weights don't actually matter for this calculation
a = 1.0
wt = pareto.pdf(Enu, a) / weights

# 1D KDE to compute the denominator f(Enu|alpha)
kernel1D_log = kde.KDE1D(Enu_log, weights=wt, bw_method=0.04, adaptive=True, weight_adaptive_bw=True, alpha=0.3)

# 2D KDE to compute the numerator f(Erec, Enu|alpha)
points = np.vstack([Erec_log, Enu_log])

kernel2D_log = gaussian_kde(points, weights=wt, bw_method=0.06,
                            adaptive=True, weight_adaptive_bw=True, alpha=0.3)


# funtion to return the entire prefactor term f(Erec|Enu) evaluated at E_reconstructed and Enu_proposed
def prefactor(E_reconstruted, Enu_proposed):
    v = np.vstack([E_reconstruted, Enu_proposed])
    return kernel2D_log.evaluate(np.log10(v), adaptive=True) / kernel1D_log(np.log10(Enu_proposed))
예제 #28
0
n = 10000
beta = 1
k1 = 2.05; k2 = 2.5; k3 = 3; k4 = 4

#First k
res31, mean1, var1 = paretobay(beta,k1,n)
anamean1 = np.mean(res31)
anavar1 = np.var(res31)

x1 = np.linspace(pareto.ppf(0.01, k1),pareto.ppf(0.9999,k1),100)

plt.figure()
plt.hist(res31,align='mid',color='tan',edgecolor='moccasin',bins=20,density=True,stacked=True)
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
plt.plot(x1-1, pareto.pdf(x1, k1),'g-', lw=2,alpha=0.6)
plt.ylim(ymin,ymax)
plt.title("Pareto Distributed Histogram (k=2.05)")
plt.xlabel("Classes")
plt.ylabel("Density")
plt.show
print('----Pareto with K = 2.05----')
print('The theoretical mean is: {0}'.format(mean1))
print('The theoretical variance is: {0}'.format(var1))
print('The analytical mean is: {0}'.format(anamean1))
print('The analytical variance is: {0}'.format(anavar1))

#Second k
res32, mean2, var2 = paretobay(beta,k2,n)
anamean2 = np.mean(res32)
anavar2 = np.var(res32)
예제 #29
0
파일: paretoPlot.py 프로젝트: 9578577/pmtk3
#!/usr/bin/env python

# Plots Pareto distribution

import matplotlib.pyplot as pl
import numpy as np
from scipy.stats import pareto

params = [(1, 3), (1, 2), (1, 1), (0.001, 1)]
styles = ['b-', 'r:', 'k-.', 'g--']
labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params]

for i, param in enumerate(params):
  m, k = param
  probabilities = pareto.pdf(np.arange(0, 2, .01), k, scale=m)
  pl.plot(np.arange(0, 2, .01), probabilities, styles[i], label=labels[i])

pl.axis([0, 2, 0, 3])
pl.title('Pareto Distribution')
pl.legend()
pl.savefig('paretoPlot.png')
pl.show()
예제 #30
0
def make_data_pen(uu_label, idx):
    # Generate PENDIGITS-LT
    input_data = pd.read_csv('PENDIGITS.csv').to_numpy()
    x = input_data[..., 1:]
    y = input_data[..., 0]
    print(y.shape)

    total = 10
    unseen = len(uu_label)
    all_num = total - unseen
    wt = np.linspace(1, ratio**0.5, all_num)

    y = y.astype(int)
    for uu in uu_label:
        y = np.where(y == uu, 99, y)

    old_x_train, x_test, old_y_train, y_test = train_test_split(x,
                                                                y,
                                                                test_size=0.2)
    x_train, y_train = old_x_train[old_y_train != uu_label_test], old_y_train[
        old_y_train != uu_label_test]

    c = Counter(y_train)
    z = [i for i in range(0, 10) if i not in uu_label]

    size = list(pareto.pdf(wt, b=1))
    random.shuffle(size)

    strategy = {i: floor(c[i] * size.pop()) for i in z}
    y_train = y_train.astype("int")

    x_res, y_res = make_imbalance(x_train, y_train, sampling_strategy=strategy)

    zipped = list(zip(x_test, y_test))
    random.shuffle(zipped)
    x_test, y_test = zip(*zipped)
    x_test = np.array(x_test)
    y_test = np.array(y_test)

    train_list = []
    test_list = []

    batch = 1
    for i in range(batch):
        X_train = x_res
        Y_train = y_res
        X_test = x_test
        Y_test = y_test
        Y_train = Y_train.reshape(-1, 1)
        Y_test = Y_test.reshape(-1, 1)
        train = np.hstack((X_train, Y_train))
        test = np.hstack((X_test, Y_test))

        train_list.append(train)
        test_list.append(test)
        train_list = np.array(train_list)
        test_list = np.array(test_list)
    np.save("pendigits/{}-{} pendigits_train.npy".format(len(uu_label), idx),
            train_list)
    np.save("pendigits/{}-{} pendigits_test.npy".format(len(uu_label), idx),
            test_list)
def main():
    # file_name = "road-minnesota.mtx"
    # file_name = "road-euroroad.edges"
    file_name = "road-chesapeake.mtx"
    # with open(os.path.join(os.getcwd(), "cpp", "graph_" + str(order_val) + ".txt"), "r") as f:
    with open(os.path.join(os.getcwd(), file_name), "r") as f:
        cpp_graph = f.readlines()
    g = {}
    print("Total Length when recovered: {}".format(len(cpp_graph)))
    ctr = 0
    total_nodes = int(cpp_graph[0])
    del cpp_graph[0]
    one_set = set()

    # One_set node number resetter starting from 0
    node_ids = {}
    i = 0
    for each in cpp_graph:
        l = each.split(' ')
        # u, v, val = int(l[0].strip()), int(l[1].strip()), float(l[-1].strip())
        u, v, val = int(l[0].strip()), int(l[1].strip()), None
        if u not in one_set:
            node_ids[u] = i
            i += 1
            one_set.add(u)
        if v not in one_set:
            node_ids[v] = i
            i += 1
            one_set.add(v)
        if g.get((node_ids[u], node_ids[v]), -1) == -1:
            g[(node_ids[u], node_ids[v])] = val
        else:
            if g[(node_ids[u], node_ids[v])] != val:
                ctr += 1
                print(
                    "(u, v): {} -I found this value before: {} and new value is: {} counter: {}"
                    .format((node_ids[u], node_ids[v]),
                            g[(node_ids[u], node_ids[v])], val, ctr))

    # full_mat = graph_maker.out
    alpha = [1]  # list of values of shape parameters
    samples = np.linspace(start=0, stop=5, num=len(g))
    x_m = 1  # scale
    output = None
    for a in alpha:
        output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)])
    i = 0
    for each in g:
        if output[0][i] == 0:
            g[each] = 0.002
        else:
            g[each] = (output[0][i])
        i += 1
    c = 0
    for each in output[0]:
        if each >= 0.1:
            c += 1
    print("Total nodes: {} \nTotal Edges: {}\ncounter: {}".format(
        len(one_set), len(g), ctr))
    print(c)

    filer = open('graph_euro_road.txt', 'w')
    filer.write(str(len(one_set)) + "\n")
    for each in g:
        u, v, val = each[0], each[1], g[each]
        if val >= 1:
            print(u, v, val)
        filer.write(" ".join([str(u), str(v), str(val), "\n"]))
    triangles = list(combinations(list(one_set), 3))
    for triangle in triangles:
        tri = list(combinations(triangle, 2))
        one = tri[0]
        two = tri[1]
        thr = tri[2]
        if g.get((one[0], one[1]), -1) == -1:
            if g.get((one[1], one[0]), -1) == -1:
                continue
            else:
                one = g[(one[1], one[0])]
        else:
            one = g[(one[0], one[1])]
        if g.get((two[0], two[1]), -1) == -1:
            if g.get((two[1], two[0]), -1) == -1:
                continue
            else:
                two = g[(two[1], two[0])]
        else:
            two = g[(two[0], two[1])]
        if g.get((thr[0], thr[1]), -1) == -1:
            if g.get((thr[1], thr[0]), -1) == -1:
                continue
            else:
                thr = g[(thr[1], thr[0])]
        else:
            thr = g[(thr[0], thr[1])]
        assert isinstance(one, float)
        assert isinstance(two, float)
        assert isinstance(thr, float)
        assert one + two >= thr
        assert one + thr >= two
        assert two + thr >= one

        print(tri)
예제 #32
0
파일: example.py 프로젝트: ezg/dancing_bars
    def __init__(self, seed, speed, nr_samples, interval):
        np.random.seed(seed)
        b = 3
        self.samples = (np.random.pareto(b, nr_samples) + 1)
        mean, var, skew, kurt = pareto.stats(b, moments='mvsk')
        self.gt_mean = mean

        self.y_values = []
        self.confidence = []
        self.x_values = range(2, nr_samples, interval)
        for i in self.x_values:
            s = self.samples[:i]
            self.y_values.append(np.mean(s))
            self.confidence.append((np.std(s) / math.sqrt(len(s))) * 1.96)

        self.y_values = np.array(self.y_values)
        self.confidence = np.array(self.confidence)

        fig = plt.figure(figsize=(10, 10))
        self.ax1 = fig.add_subplot(2, 2, (1, 2))
        self.ax2 = fig.add_subplot(2, 2, 3)
        self.ax3 = fig.add_subplot(2, 2, 4)

        # history plot
        self.ax1.set_title('dancing bar history')
        self.ax1.set_xlabel('iteration')
        self.ax1.set_ylabel('estimated mean')
        self.ax1.set_xlim(0, nr_samples)
        self.ax1.set_ylim(np.min(self.y_values - self.confidence),
                          np.max(self.y_values + self.confidence))

        self.ax1_primitives = []
        p = Polygon(self._history_polygon_xy(1), True, alpha=0.4, color='blue')
        self.ax1_primitives.append(p)
        self.ax1.add_patch(p)

        l = Line2D([], [], color='blue')
        self.ax1_primitives.append(l)
        self.ax1.add_line(l)

        self.ax1.axhline(y=mean, color='black', linestyle='--', linewidth=0.5)

        # bar plot
        self.ax2.set_title('dancing bar')
        self.ax2.set_ylabel('avg sales')
        self.ax2.set_xlim(-0.5, 1)
        self.ax2.set_xticks([0.25])
        self.ax2.set_xticklabels(['department XYZ'])
        self.ax2.set_ylim(0, np.max(self.y_values + self.confidence))

        self.ax2_primitives = []
        r = Rectangle((0, 0), 0.5, self.y_values[1], alpha=0.4, color='blue')
        self.ax2_primitives.append(r)
        self.ax2.add_patch(r)

        self.ax2.axhline(y=mean, color='black', linestyle='--', linewidth=0.5)

        l = Line2D([0.25, 0.25], [
            self.y_values[1] - self.confidence[1],
            self.y_values[1] + self.confidence[1]
        ],
                   color='black')
        self.ax2_primitives.append(l)
        self.ax2.add_line(l)

        # pdf plot
        self.ax3.set_title('pareto pdf')
        x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100)
        self.ax3.plot(x, pareto.pdf(x, b) + 1, 'blue', lw=1, alpha=0.6)

        animation.TimedAnimation.__init__(self,
                                          fig,
                                          interval=speed,
                                          blit=True,
                                          repeat=False)
예제 #33
0
from scipy.stats import pareto
print(pareto.pdf(6, 3, 0, 3))
예제 #34
0
 def f(x): return pareto.pdf(x, b=self.ζ, loc=0, scale=xm)
 def g(x): return lognorm.pdf(x, s=σ, scale=np.exp(μ))