def TV(base_oracle,weights_0,weights_1,centers_0,centers_1): # running counter for which batch we are processing counter = 0 # running estimate for the TV distance result = 0 # iterate over the batches for _ in range(TOTAL_SAMPS/BATCH_SIZE): # draw batch of samples from mixture of pareto's of size BATCH_SIZE points = base_oracle() # BATCH_SIZE x total_comps arrays consisting of distances from the pareto # mixture samples to all centers of the two superpositions of Airy disks dist_to_centers_0 = np.zeros((len(points),len(weights_0))) dist_to_centers_1 = np.zeros((len(points),len(weights_1))) # squared distance of the pareto mixture samples from the x-axis ysquared_diffs = points[:,1]**2 # compute the entries of dist_to_centers_0 for i in range(len(weights_0)): xdiff_to_center_i_0 = points[:,0] - centers_0[i] dist_to_centers_0[:,i] = np.sqrt(xdiff_to_center_i_0**2 + ysquared_diffs) # compute the entries of dist_to_centers_0 for i in range(len(weights_1)): xdiff_to_center_i_1 = points[:,0] - centers_1[i] dist_to_centers_1[:,i] = np.sqrt(xdiff_to_center_i_1**2 + ysquared_diffs) # Boolean matrix indicating which points fall within the "augmented" part of the # augmented Pareto around each of the centers unit_interval_points = np.hstack(((dist_to_centers_0 < 1.), (dist_to_centers_1 < 1.))) # pareto density (with parameter 2/3) evaluated at each sample with respect to each center # NOTE: 2*pi comes from polar coordinates pareto_densities_0 = pareto.pdf(dist_to_centers_0, 2/3.) / dist_to_centers_0 * weights_0 / (2 * np.pi) pareto_densities_1 = pareto.pdf(dist_to_centers_1, 2/3.) / dist_to_centers_1 * weights_1 / (2 * np.pi) pareto_densities = np.hstack((pareto_densities_0,pareto_densities_1)) # density of "augmented" part of the augmented Pareto evaluated at each sample with respect to each center unit_interval_densities_0 = 1./dist_to_centers_0 * weights_0 / (2 * np.pi) unit_interval_densities_1 = 1./dist_to_centers_1 * weights_1 / (2 * np.pi) unit_interval_densities = np.hstack((unit_interval_densities_0,unit_interval_densities_1)) # density of the mixture of augmented paretos at each of the points # NOTE: factor of /2 at the end is because proposal_densities is over both sets of centers proposal_densities = np.sum(pareto_densities * (1 - unit_interval_points)/2. + unit_interval_densities * unit_interval_points/2.,axis=1)/2 # densities of the two superpositions of airy disks at each of the points D0_densities = np.sum(airy(dist_to_centers_0) * weights_0, axis=1) D1_densities = np.sum(airy(dist_to_centers_1) * weights_1, axis=1) # absolute difference in Radon-Nikodym derivatives between the two superpositions relative to the mixture of augmented paretos # NOTE: normalization constant pi comes from fact that integral of J_1(sqrt(x^2+y^2))/(x^2+y^2) over R^2 is pi ratio_diffs = np.abs((D1_densities - D0_densities)/proposal_densities) / np.pi # average absolute difference across this batch new_av = np.average(ratio_diffs) # update running average result = (result * counter + new_av)/(counter + 1.) counter += 1 # output TV estimate (note TV = L1/2, hence factor of 2) return result/2.
def test_plot_1(): with sciplot.style(locale_setting='en_US.UTF-8'): x_m = 2 # scale alpha_lst = [1, 2, 3, 4] # shape parameters x = np.linspace(0, 6, 1000) pdf = np.array([pareto.pdf(x, scale=x_m, b=a) for a in alpha_lst]) sciplot.set_size_cm(7) fig, ax = plt.subplots(1, 1) fig.suptitle( r'Pareto PDF' + r' $p(x \,|\, x_\mathrm{m}, \alpha) = \frac{\alpha x_\mathrm{m}^\alpha}{x^{\alpha+1}}$' + r' with $x_\mathrm{m}=2$') line_plot = ax.plot(x, pdf.T) label_lst = [] for alpha in alpha_lst: label_lst.append(r'$\alpha=' + str(alpha) + '$') sciplot.set_legend(ax=ax, plot_tpl=line_plot, label_tpl=tuple(label_lst), loc='upper right') ax.set_xlabel('$x$') ax.set_ylabel(r'$p(x \,|\, x_\mathrm{m}, \alpha)$') return fig
def real_pareto(distance_list, nodes): alpha = [1] # list of values of shape parameters samples = np.linspace(start=0, stop=5, num=len(distance_list)) x_m = 1 # scale output = None for a in alpha: output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)]) plot_graph(output[0], nodes, 'Actual Pareto (auto bin size)', file_name="ParetoDistn.png")
def paretoplt(alphas): import numpy as np from matplotlib import pyplot as plt from scipy.stats import pareto xm = 1 # scale x = np.linspace(0, 5, 1000) output = np.array([pareto.pdf(x, scale=xm, a) for a in alphas]) plt.plot(x, output.T) plt.show() return
def model(param): # unpack the param vector Enu_proposed, alpha_proposed = param # plot the prefactor conditioned on Enu_proposed #print(Enu_proposed) #plot_prefactor(Enu_proposed) # regenerate f(Enu|alpha) at the proposed alpha wt = pareto.pdf(Enu, alpha_proposed) / weights kernel1D_log = kde.KDE1D(Enu_log, weights=wt, bw_method=0.04, adaptive=True, weight_adaptive_bw=True, alpha=0.3) return prefactor(E_reconstruted, Enu_proposed) * kernel1D_log.evaluate(np.log10(Enu_proposed))
import numpy as np from scipy.stats import pareto import matplotlib.pyplot as plt from matplotlib.patches import Rectangle from pathlib import Path import sciplot # Plot 1 with sciplot.style(theme='no-latex', locale_setting='en_US.UTF-8'): x_m = 2 # scale alpha_lst = [1, 2, 3, 4] # shape parameters x = np.linspace(0, 6, 1000) pdf = np.array([pareto.pdf(x, scale=x_m, b=a) for a in alpha_lst]) sciplot.set_size_cm(7) fig, ax = plt.subplots(1, 1) fig.suptitle( r'Pareto PDF' + r' $p(x \,|\, x_\mathrm{m}, \alpha) = \frac{\alpha x_\mathrm{m}^\alpha}{x^{\alpha+1}}$' + r' with $x_\mathrm{m}=2$') line_plot = ax.plot(x, pdf.T) label_lst = [] for alpha in alpha_lst: label_lst.append(r'$\alpha=' + str(alpha) + '$') sciplot.set_legend(ax=ax,
#!/usr/bin/env python # Plots Pareto distribution from scipy.stats import pareto import numpy as np import matplotlib.pylab as pl params = [(1, 3), (1, 2), (1, 1), (0.001, 1)] styles = ['b-', 'r:', 'k-.', 'g--'] labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params] for i, param in enumerate(params): m, k = param probabilities = pareto.pdf(np.arange(0, 2, .01), k, scale=m) pl.plot(np.arange(0, 2, .01), probabilities, styles[i], label=labels[i]) pl.axis([0, 2, 0, 3]) pl.title('Pareto Distribution') pl.legend() pl.savefig('paretoPlot.png') pl.show()
def pareto_likelihood(node, data=None, dtype=np.float64): probs = np.ones((data.shape[0], 1), dtype=dtype) from scipy.stats import pareto probs[:] = pareto.pdf(data[:, node.scope], node.a) return probs
def _pdf(self, x, b, m): return pareto.pdf(x, b) / pareto.cdf(m, b)
import superimport import numpy as np import matplotlib.pyplot as plt import pyprobml_utils as pml from scipy.stats import pareto params = [(0.1, 1), (0.1, 2), (0.2, 1), (0.2, 2)] styles = ['b-', 'r:', 'k-.', 'g--'] labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params] x = np.linspace(0, 1, 1000) for i, param in enumerate(params): m, k = param probabilities = pareto.pdf(x, k, scale=m) plt.plot(x, probabilities, styles[i], label=labels[i]) plt.title('Pareto Distribution') plt.legend() plt.axis((0.0, 0.5, 0, 20)) pml.savefig('paretoPdf.pdf') plt.show() for i, param in enumerate(params): m, k = param probabilities = pareto.pdf(x, k, scale=m) plt.loglog(x, probabilities, styles[i], label=labels[i]) plt.xlim(0.05, 1) plt.title('Log Pareto Distribution')
mpl.rcParams['axes.color_cycle'] = ['k', 'k', 'k', 'k', 'k', 'k'] import matplotlib.pyplot as plt f, ax = plt.subplots(2, 2, figsize=(2 * 0.8 * 4, 2 * 0.8 * 3)) data = loadtxt( "/home/marius/Dokumenter/fys4150/batmobile/resources/data_2.000000_111.txt" ) data = data.reshape(data.shape[0] * data.shape[1]) ax[0][0].hist(data, bins=linspace(0, 100, 1000), normed=True, facecolor="white") m = linspace(0, 10, 1000) ax[0][0].plot(m, pareto.pdf(m, 3.5, loc=-1), 'k--', label=ur"$a = 3.5$", linewidth=1.5) ax[0][0].set_xlim(0, 3) ax[0][0].legend(fontsize=11) data = loadtxt( "/home/marius/Dokumenter/fys4150/batmobile/resources/data_1.500000_351.txt" ) data = data.reshape(data.shape[0] * data.shape[1]) ax[0][1].hist(data, bins=linspace(0, 100, 800), normed=True, facecolor="white") m = linspace(0, 10, 1000) ax[0][1].plot(m, pareto.pdf(m, 2.5, loc=-1), 'k--',
# In[43]: # Выбор параметров для распределения k = 10 #Сгенерируйте из него выборку объёма 1000 sampleRange = pareto.rvs(k, size=1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples pareto') plt.ylabel('number of samples') plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = pareto.ppf(0.01, k) right = pareto.ppf(0.99, k) x = np.linspace(left, right, 100) plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf') plt.legend(loc='best') # In[57]: # функция построения гистограммы распределений выборочных средних # и плотности соответствующего нормального распределения # sizeSamples - выбороки объёма n def paretoF(sizeSamples, Ex, Dx): n = sizeSamples #генерация выборок values = np.array([ pareto.rvs(k, size=n) for x in range(1000)]) #вычисление выборочных средних meanVal = values.mean(axis = 1) plt.hist(meanVal, normed=True, alpha=0.5, label='hist mean n ' + str(n))
# + p_bar = 0.1 # probability threshold p_quant = r_[arange(10**-4, p_bar + 10**-4, 10**-4), arange(p_bar + 0.001, 1.001, 0.001)].reshape( 1, -1) # quantile probability levels q_HFP = HFPquantile(y, p_quant, p) y_bar = q_HFP[p_quant == p_bar] # threshold # data below the threshold l_1 = where(y[0] < y_bar)[0] l_2 = where(p_quant[0] <= p_bar)[0] y_ex = y_bar - y[[0], l_1] # dataset of the conditional excess distribution csi_MLFP, sigma_MLFP = FitGenParetoMLFP( y_ex, p[0, l_1] ) # Maximum Likelihood optimization with Generalized Pareto Distribution f_MLFP = pareto.pdf(sort(y_ex), csi_MLFP, sigma_MLFP, 0) # estimated pdf q_MLFP, *_ = QuantileGenParetoMLFP(y_bar, p_bar, csi_MLFP, sigma_MLFP, p_quant[0, l_2]) # MLFP-quantile q_bt = q_HFP[0, l_2] # historical quantile below the threshold # - # ## Generate figures showing the unconditional MLFP-mean and standard deviation and the estimated unconditional quantile function # + typ = namedtuple('type', 'Entropy') typ.Entropy = 'Exp' ens = EffectiveScenarios(p, typ) option = namedtuple('option', 'n_bins')
unique, counts = np.unique(data, return_counts=True) if n > 10: max_value = np.amax(counts) print(max_value) plt.plot(unique, counts * 5 / max_value, label="Pareto EPMF{h}".format(h=i + 1)) else: plt.plot(unique, counts, label="Pareto EPMF{h}".format(h=i + 1)) return data [a, xm] = [5, 1] ddata = poligon_par(5) plt.plot(np.unique(ddata), pareto.pdf(np.unique(ddata), a, scale=xm), color="black", lw=4, label="Pareto PMF") plt.legend(loc='best', frameon=False) plt.show() ddata = poligon_par(10) plt.plot(np.unique(ddata), pareto.pdf(np.unique(ddata), a, scale=xm), color="black", lw=4, label="Pareto PMF") plt.legend(loc='best', frameon=False) plt.show() ddata = poligon_par(100) plt.plot(np.unique(ddata),
pylab.ylim(-50, 50) pylab.xlim(-5, 5) pylab.xlabel('$x$') pylab.legend() pylab.show() #Pareto import numpy as np import matplotlib.pyplot as plt from scipy.stats import pareto x_m = 1 #scale alpha = [1, 2] #list of values of shape parameters samples = np.linspace(start=0, stop=5, num=1000) #esantionul for a in alpha: output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)]) plt.plot(samples, output.T, label='alpha {0}'.format(a)) plt.xlabel('samples', fontsize=15) plt.ylabel('PDF', fontsize=15) plt.title('Probability Density function', fontsize=15) plt.grid(b=True, color='grey', alpha=0.3, linestyle='-.', linewidth=2) plt.rcParams["figure.figsize"] = [5, 5] plt.legend(loc='best') plt.show() #Exercise 3 -- Cuberschi Lucian #(Random variables, implementation) #Suppose you measure the temperature 10 consecutive days with a thermometer that has a small random error. # 1.What is the mean temperature, knowing that the mean error is +1°C and the measurements are those in the variable Y below.
step = (ma - mi) / sqrt(n) beg = round(mi, 3) arr_x.append(beg) for i in range(round(sqrt(n))): beg += step arr_x.append(round(beg, 3)) arr_y = list() for j in range(round(sqrt(n))): count = 0 for element in data: if arr_x[j] <= element and element <= arr_x[j + 1]: count += 1 arr_y.append(count) arr_x.pop() maximum = max(arr_y) arrr_y = list() for each in arr_y: el = each * a / maximum arrr_y.append(el) x = np.linspace(0, 5, 1000) par_arr = np.array(pareto.pdf(x, scale=xm, b=a)) fig = plt.figure() ax = fig.add_subplot() plt.hist(arr_x, weights=arrr_y, bins=round(sqrt(n)), label="Pareto pdf simulation") plt.plot(x, par_arr, color="r", label="Pareto pdf") plt.legend(loc='best', frameon=False) plt.show()
from scipy.stats import pareto print(pareto.pdf(6,3,0,3))
def expected(x, i = -1): return pareto.pdf(x, 1)
from scipy.stats import norm from scipy.stats import pareto def multiply_plot(pdf1, title1, pdf2, title2): x = np.linspace(0, 10, 100) fig = plt.figure(figsize=(5, 7)) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax1.plot(x, pdf1(x)) ax1.set_title(title1) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax2.plot(x, pdf2(x)) ax2.set_title(title2) ax3 = plt.subplot2grid((2, 2), (1, 0), colspan=2) prod_pdf = lambda z: pdf1(z) * pdf2(z) ax3.plot(x, prod_pdf(x)) prod_name = title1 + ' x ' + title2 ax3.set_title('Success = ' + prod_name) for ax in [ax1, ax2, ax3]: ax.set_xticks([]) ax.set_yticks([]) return prod_pdf, prod_name norm_pdf = lambda x: norm.pdf(x, 5, 2) prod_pdf, prod_name = multiply_plot(norm_pdf, 'Talent', norm_pdf, 'Effort') prod_pdf, prod_name = multiply_plot(prod_pdf, prod_name, norm_pdf, 'Beauty') pareto_pdf = lambda x: pareto.pdf(x, 5, 2) prod_pdf, prod_name = multiply_plot(prod_pdf, prod_name, pareto_pdf, 'IG Followers')
from scipy.stats import pareto import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: b = 2.62 mean, var, skew, kurt = pareto.stats(b, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = pareto(b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = pareto.ppf([0.001, 0.5, 0.999], b) np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b)) # True # Generate random numbers:
# plt.xlabel('Confidence intervals values for the variance') # plt.yticks([]) # plt.legend() # plt.show() # histogram 3 for Pareto k = [2.05, 2.5, 3, 4] mean_list = [] var_list = [] E_list = [] V_list = [] dic = {"k:2.05": [], "k:2.5": [], "k:3": [], "k:4": []} for i in k: U = np.random.uniform(0.0, 0.1, 10000) par1, E, Var = funcPareto.pareto(i, 10000) ppf = np.linspace(pareto.ppf(0.01, i), pareto.ppf(0.99, i), 100) pdf = pareto.pdf(ppf, i) # comparison(par1, ppf-1, pdf, "Pareto comparison for k = {0}".format(i)) E_list.append(E) V_list.append(Var) mean_list.append(np.mean(par1)) var_list.append(np.var(par1)) dic["k:2.05"] = [E_list[0], V_list[0], mean_list[0], var_list[0]] dic["k:2.5"] = [E_list[1], V_list[1], mean_list[1], var_list[1]] dic["k:3"] = [E_list[2], V_list[2], mean_list[2], var_list[2]] dic["k:4"] = [E_list[3], V_list[3], mean_list[3], var_list[3]] df = pd.DataFrame(dic, index=["Mean", "Variance", "Mean_analytical", "Variance_analytical"]) print(df)
import matplotlib.pyplot as plt import numpy as np from scipy.stats import pareto [a, xm] = [5, 1] N = [5, 10, 100, 1000, 10**5] fig = plt.figure() ax = fig.add_subplot() for n in N: for i in range(5): data = np.zeros(n) for iteration in range(n): xi = np.random.rand() r = xm / xi ** (1 / a) data[iteration] = r data = np.around(data, decimals=2) unique, counts = np.unique(data, return_counts=True) ax.hist(unique, bins=int(len(unique)), weights=counts, label="Pareto EHMF{h}".format(h=i + 1), alpha=0.7, density=True) plt.plot(np.unique(data), pareto.pdf(np.unique(data), a, scale=xm), '-.', color="black", lw=1, label="Pareto PMF") plt.legend(loc='best') plt.show()
k = 10 x_m = 1 # In[129]: #Сгенерируйте из него выборку объёма 1000 sampleRange = paretoF(1000) #Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины. plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples') plt.ylabel('number of samples') plt.xlabel('$x$') #теоретическая плотность распределения случайной величины left = pareto.ppf(0.01, k) right = pareto.ppf(0.99, k) x = np.linspace(left, right, 100) plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf') plt.legend(loc='best') # In[150]: # values = np.array([pareto.rvs(k, size=10) for x in range(10)]) # print values # plt.hist(values.mean(axis=1), normed=True) m = [] for _ in xrange(20): m.append(np.mean(pareto.rvs(k, size=1000))) # plt.hist(m, normed=True, alpha=0.5, label='hist samples') mean = pareto.mean(k) EX = mean
from scipy.stats import pareto import matplotlib.pyplot as plt burst_time = 500 idle_time = 500 rate = 200 packetSize = 210 shape = 0.5 interval = (packetSize * 8) / rate burstlen = burst_time / interval for i in range(33): ## b1 = (burstlen * (shape - 1)) / shape ## b2 = (idle_time * (shape - 1)) / shape ## print('B1: {} ||| B2: {}'.format(b1, b2)) ##next_burtlen burstlen = int(pareto.pdf(i*-52, shape) + 0.5) if (burstlen == 0): burstlen = 1 print('BurstLen {}: {}'.format(i, burstlen)) ##next_idle_time idle_time= pareto.pdf(i*-22, shape) print('IdleTime {}: {}'.format(i, idle_time))
print(data) fig, ax = plt.subplots() # Plot the histogram bins = list(range(0, max(data), 100)) # bins should be every 100ms bins.append(max(data)) # also include the last one print(bins) plt.hist(data, bins=bins, density=True, facecolor='green', alpha=1) # Try to fit a Pareto in the data # shape is b (alpha in wikipedia), scale is x (x_b in wikipedia) shape, loc, scale = pareto.fit(data) y = pareto.pdf(bins, shape, loc=loc, scale=scale) # Plot the Pareto on top of the bins l = plt.plot(bins, y, 'r--', linewidth=2) plt.xticks((0, 500, 1000, 1500) + tuple(range(2500, max(data), 5000)), rotation=90) ax.grid(alpha=0.3) #plot plt.xlabel('Miliseconds') plt.ylabel('Probability') plt.title( "Histogram of %d circuit timeout values fitted against Pareto with shape=%.3f, loc=%.3f and scale=%.3f" % (len(data), shape, loc, scale)) plt.grid(True)
#PARTE C OUTRAS DISTRIBUIÇÕES N = 500 xn = np.linspace(0, 50, N) xc = np.linspace(0, 100, 100) #Pareto alpha = [1.16] loc = 0 scale = 1 for i in alpha: pdf = pareto.pdf(xn, i, loc) plt.figure('Pareto PDF') plt.title('Pareto PDF') ax = sns.lineplot(xn, pdf, color='k') ax.fill_between(xn, pdf, color='olivedrab', alpha=0.2) cdf = pareto.cdf(xc, alpha[0], loc, scale) plt.figure('Pareto CDF') plt.title('Pareto CDF') ax = sns.lineplot(xc, cdf, color='red') ax.fill_between(xc, cdf, color="firebrick", alpha=0.3) #Gamma (com a = é uma exponencial) a = [1, 3, 5]
# create a log data set idx = np.where(Erec > 0.0)[0] Enu_log = np.log10(np.take(Enu, idx)) Erec_log = np.log10(np.take(Erec, idx)) # trim the non-log data down to size weights = np.take(weights, idx) Enu = np.take(Enu, idx) ''' Construct the function f(Erec|Enu) = f(Erec, Enu|alpha) / f(Enu|alpha) ''' # weights don't actually matter for this calculation a = 1.0 wt = pareto.pdf(Enu, a) / weights # 1D KDE to compute the denominator f(Enu|alpha) kernel1D_log = kde.KDE1D(Enu_log, weights=wt, bw_method=0.04, adaptive=True, weight_adaptive_bw=True, alpha=0.3) # 2D KDE to compute the numerator f(Erec, Enu|alpha) points = np.vstack([Erec_log, Enu_log]) kernel2D_log = gaussian_kde(points, weights=wt, bw_method=0.06, adaptive=True, weight_adaptive_bw=True, alpha=0.3) # funtion to return the entire prefactor term f(Erec|Enu) evaluated at E_reconstructed and Enu_proposed def prefactor(E_reconstruted, Enu_proposed): v = np.vstack([E_reconstruted, Enu_proposed]) return kernel2D_log.evaluate(np.log10(v), adaptive=True) / kernel1D_log(np.log10(Enu_proposed))
n = 10000 beta = 1 k1 = 2.05; k2 = 2.5; k3 = 3; k4 = 4 #First k res31, mean1, var1 = paretobay(beta,k1,n) anamean1 = np.mean(res31) anavar1 = np.var(res31) x1 = np.linspace(pareto.ppf(0.01, k1),pareto.ppf(0.9999,k1),100) plt.figure() plt.hist(res31,align='mid',color='tan',edgecolor='moccasin',bins=20,density=True,stacked=True) xmin, xmax = plt.xlim() ymin, ymax = plt.ylim() plt.plot(x1-1, pareto.pdf(x1, k1),'g-', lw=2,alpha=0.6) plt.ylim(ymin,ymax) plt.title("Pareto Distributed Histogram (k=2.05)") plt.xlabel("Classes") plt.ylabel("Density") plt.show print('----Pareto with K = 2.05----') print('The theoretical mean is: {0}'.format(mean1)) print('The theoretical variance is: {0}'.format(var1)) print('The analytical mean is: {0}'.format(anamean1)) print('The analytical variance is: {0}'.format(anavar1)) #Second k res32, mean2, var2 = paretobay(beta,k2,n) anamean2 = np.mean(res32) anavar2 = np.var(res32)
#!/usr/bin/env python # Plots Pareto distribution import matplotlib.pyplot as pl import numpy as np from scipy.stats import pareto params = [(1, 3), (1, 2), (1, 1), (0.001, 1)] styles = ['b-', 'r:', 'k-.', 'g--'] labels = ['m={:.2f}, k={:.2f}'.format(m, k) for m, k in params] for i, param in enumerate(params): m, k = param probabilities = pareto.pdf(np.arange(0, 2, .01), k, scale=m) pl.plot(np.arange(0, 2, .01), probabilities, styles[i], label=labels[i]) pl.axis([0, 2, 0, 3]) pl.title('Pareto Distribution') pl.legend() pl.savefig('paretoPlot.png') pl.show()
def make_data_pen(uu_label, idx): # Generate PENDIGITS-LT input_data = pd.read_csv('PENDIGITS.csv').to_numpy() x = input_data[..., 1:] y = input_data[..., 0] print(y.shape) total = 10 unseen = len(uu_label) all_num = total - unseen wt = np.linspace(1, ratio**0.5, all_num) y = y.astype(int) for uu in uu_label: y = np.where(y == uu, 99, y) old_x_train, x_test, old_y_train, y_test = train_test_split(x, y, test_size=0.2) x_train, y_train = old_x_train[old_y_train != uu_label_test], old_y_train[ old_y_train != uu_label_test] c = Counter(y_train) z = [i for i in range(0, 10) if i not in uu_label] size = list(pareto.pdf(wt, b=1)) random.shuffle(size) strategy = {i: floor(c[i] * size.pop()) for i in z} y_train = y_train.astype("int") x_res, y_res = make_imbalance(x_train, y_train, sampling_strategy=strategy) zipped = list(zip(x_test, y_test)) random.shuffle(zipped) x_test, y_test = zip(*zipped) x_test = np.array(x_test) y_test = np.array(y_test) train_list = [] test_list = [] batch = 1 for i in range(batch): X_train = x_res Y_train = y_res X_test = x_test Y_test = y_test Y_train = Y_train.reshape(-1, 1) Y_test = Y_test.reshape(-1, 1) train = np.hstack((X_train, Y_train)) test = np.hstack((X_test, Y_test)) train_list.append(train) test_list.append(test) train_list = np.array(train_list) test_list = np.array(test_list) np.save("pendigits/{}-{} pendigits_train.npy".format(len(uu_label), idx), train_list) np.save("pendigits/{}-{} pendigits_test.npy".format(len(uu_label), idx), test_list)
def main(): # file_name = "road-minnesota.mtx" # file_name = "road-euroroad.edges" file_name = "road-chesapeake.mtx" # with open(os.path.join(os.getcwd(), "cpp", "graph_" + str(order_val) + ".txt"), "r") as f: with open(os.path.join(os.getcwd(), file_name), "r") as f: cpp_graph = f.readlines() g = {} print("Total Length when recovered: {}".format(len(cpp_graph))) ctr = 0 total_nodes = int(cpp_graph[0]) del cpp_graph[0] one_set = set() # One_set node number resetter starting from 0 node_ids = {} i = 0 for each in cpp_graph: l = each.split(' ') # u, v, val = int(l[0].strip()), int(l[1].strip()), float(l[-1].strip()) u, v, val = int(l[0].strip()), int(l[1].strip()), None if u not in one_set: node_ids[u] = i i += 1 one_set.add(u) if v not in one_set: node_ids[v] = i i += 1 one_set.add(v) if g.get((node_ids[u], node_ids[v]), -1) == -1: g[(node_ids[u], node_ids[v])] = val else: if g[(node_ids[u], node_ids[v])] != val: ctr += 1 print( "(u, v): {} -I found this value before: {} and new value is: {} counter: {}" .format((node_ids[u], node_ids[v]), g[(node_ids[u], node_ids[v])], val, ctr)) # full_mat = graph_maker.out alpha = [1] # list of values of shape parameters samples = np.linspace(start=0, stop=5, num=len(g)) x_m = 1 # scale output = None for a in alpha: output = np.array([pareto.pdf(x=samples, b=a, loc=0, scale=x_m)]) i = 0 for each in g: if output[0][i] == 0: g[each] = 0.002 else: g[each] = (output[0][i]) i += 1 c = 0 for each in output[0]: if each >= 0.1: c += 1 print("Total nodes: {} \nTotal Edges: {}\ncounter: {}".format( len(one_set), len(g), ctr)) print(c) filer = open('graph_euro_road.txt', 'w') filer.write(str(len(one_set)) + "\n") for each in g: u, v, val = each[0], each[1], g[each] if val >= 1: print(u, v, val) filer.write(" ".join([str(u), str(v), str(val), "\n"])) triangles = list(combinations(list(one_set), 3)) for triangle in triangles: tri = list(combinations(triangle, 2)) one = tri[0] two = tri[1] thr = tri[2] if g.get((one[0], one[1]), -1) == -1: if g.get((one[1], one[0]), -1) == -1: continue else: one = g[(one[1], one[0])] else: one = g[(one[0], one[1])] if g.get((two[0], two[1]), -1) == -1: if g.get((two[1], two[0]), -1) == -1: continue else: two = g[(two[1], two[0])] else: two = g[(two[0], two[1])] if g.get((thr[0], thr[1]), -1) == -1: if g.get((thr[1], thr[0]), -1) == -1: continue else: thr = g[(thr[1], thr[0])] else: thr = g[(thr[0], thr[1])] assert isinstance(one, float) assert isinstance(two, float) assert isinstance(thr, float) assert one + two >= thr assert one + thr >= two assert two + thr >= one print(tri)
def __init__(self, seed, speed, nr_samples, interval): np.random.seed(seed) b = 3 self.samples = (np.random.pareto(b, nr_samples) + 1) mean, var, skew, kurt = pareto.stats(b, moments='mvsk') self.gt_mean = mean self.y_values = [] self.confidence = [] self.x_values = range(2, nr_samples, interval) for i in self.x_values: s = self.samples[:i] self.y_values.append(np.mean(s)) self.confidence.append((np.std(s) / math.sqrt(len(s))) * 1.96) self.y_values = np.array(self.y_values) self.confidence = np.array(self.confidence) fig = plt.figure(figsize=(10, 10)) self.ax1 = fig.add_subplot(2, 2, (1, 2)) self.ax2 = fig.add_subplot(2, 2, 3) self.ax3 = fig.add_subplot(2, 2, 4) # history plot self.ax1.set_title('dancing bar history') self.ax1.set_xlabel('iteration') self.ax1.set_ylabel('estimated mean') self.ax1.set_xlim(0, nr_samples) self.ax1.set_ylim(np.min(self.y_values - self.confidence), np.max(self.y_values + self.confidence)) self.ax1_primitives = [] p = Polygon(self._history_polygon_xy(1), True, alpha=0.4, color='blue') self.ax1_primitives.append(p) self.ax1.add_patch(p) l = Line2D([], [], color='blue') self.ax1_primitives.append(l) self.ax1.add_line(l) self.ax1.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) # bar plot self.ax2.set_title('dancing bar') self.ax2.set_ylabel('avg sales') self.ax2.set_xlim(-0.5, 1) self.ax2.set_xticks([0.25]) self.ax2.set_xticklabels(['department XYZ']) self.ax2.set_ylim(0, np.max(self.y_values + self.confidence)) self.ax2_primitives = [] r = Rectangle((0, 0), 0.5, self.y_values[1], alpha=0.4, color='blue') self.ax2_primitives.append(r) self.ax2.add_patch(r) self.ax2.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) l = Line2D([0.25, 0.25], [ self.y_values[1] - self.confidence[1], self.y_values[1] + self.confidence[1] ], color='black') self.ax2_primitives.append(l) self.ax2.add_line(l) # pdf plot self.ax3.set_title('pareto pdf') x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) self.ax3.plot(x, pareto.pdf(x, b) + 1, 'blue', lw=1, alpha=0.6) animation.TimedAnimation.__init__(self, fig, interval=speed, blit=True, repeat=False)
from scipy.stats import pareto print(pareto.pdf(6, 3, 0, 3))
def f(x): return pareto.pdf(x, b=self.ζ, loc=0, scale=xm) def g(x): return lognorm.pdf(x, s=σ, scale=np.exp(μ))