def foc(gov_policies, psi, sig, start=0, end=10): # Initialize local variables for government policies for better # readability. tax = gov_policies[0] trans = gov_policies[1] result = [] # Compute different parts of first FOC (respect tax rate) and combine part_a = integrate.quad( lambda w: dell_u_tax(w, cons(w, tax, psi, trans), hours(w, tax, psi), psi, tax) * lognorm.pdf( w, s=sig, scale=np.exp(-sig**2 / 2)), 0, 10 # set integration borders )[0] part_b = integrate.quad( lambda w: w * hours(w, tax, psi), start, end )[0] # Compute first part of the second FOC (respect transfers) part_c = integrate.quad( lambda w: lognorm.pdf(w, s=sig, scale=np.exp(-sig**2 / 2)) * dell_u_trans(cons(w, tax, psi, trans), hours(w, tax, psi), psi), start, end )[0] # Store first foc in results vector result.append(part_a + part_c * part_b) # Compute budget constraint bud_const = trans - integrate.quad( lambda w: tax * w * hours(w, tax, psi), start, end )[0] result.append(bud_const) return result
def MakeLNSmoother(maxn, offset, width): offset = np.log(offset) nrmf = sum([lognorm.pdf(i-maxn,offset+maxn,width) for i in range(-int(maxn*max(width,1)*4),int(maxn*max(width,1)*4)) if (i-maxn) > 0]) smoother = np.zeros(2*maxn) for k in range(1,maxn): smoother[k+maxn] = lognorm.pdf(k,offset,width)/nrmf return {'MaxN':maxn, 'Smoother':smoother}
def Delta(): sigma=1 func1= lambda xx: norm.pdf(xx,0,sigma) #func2= lambda xx: norm.pdf(xx,0,0.1) func3= lambda z: lognorm.pdf(z,1) #print 1 - integrate.quad(func1,-4*sigma,4*sigma)[0] print 1 - integrate.quad(func3,0,10)[0] x=arange(-1,10,0.1) #y1=map(func1,x) #y2=map(func2,x) y3=map(func3,x) #plt.plot(x,y1) #plt.plot(x,y2) #plt.plot(x,y3) #plt.show() jeffreys= lambda z: 1/math.sqrt(z+1.5) jeffreys2= lambda z: 1/math.sqrt(z+15) xxx=arange(0,2.6,0.1) y=map(jeffreys,xxx) yy=map(jeffreys2,xxx) plot1, =plt.plot(xxx,y) plot2, =plt.plot(xxx,yy,'r') plt.xlabel('Signal strength') plt.ylabel('Jeffreys prior') plt.legend([plot1,plot2],['s=1.0, b=1.5','s=1.0, b=15.0']) #plt.show() print jeffreys(0)/jeffreys(1)
def familiarity(self, Ytest, ytrmean = None, ytrstd=None, sigma2=None): #def my_logpdf(y, ymean, yvar): # import numpy as np # N = y.shape[0] # ln_det_cov = N * np.log(yvar) # return -0.5 * (np.sum((y - ymean) ** 2 / yvar) + ln_det_cov + N * np.log(2. * np.pi)) #from scipy.stats import multivariate_normal #var = multivariate_normal(mean=[0,0], cov=[[1,0],[0,1]]) #var.pdf([1,0]) from scipy.stats import lognorm assert(self.type == 'bgplvm') import numpy as np N = Ytest.shape[0] if ytrmean is not None: Ytest -= ytrmean Ytest /= ytrstd qx, mm = self.model.infer_newX(Ytest) #ymean, yvar = model._raw_predict(qx) # This causes the code to hang!!! Replace qx with qx.mean.values...!!!! ymean, yvar = self.model.predict(qx) ll = np.zeros(N) for j in range(N): #ll[j] = my_logpdf(Ytest[j], ymean[j], yvar[j]) #ll[j] = multivariate_normal(mean=ymean[j], cov=np.diag(yvar[j])).pdf(Ytest[j]) ll[j] = lognorm.pdf(Ytest[j], s=1, loc=ymean[j], scale=yvar[j]).mean() loglike = ll.mean() return loglike
def plot_histogram(data_sets, attribute_names, range, fit_dist='', ylabel='', title='', bins=20, normed=1, histtype='bar', facecolor='#0099FF', log=0): data_to_plot = retrieve_attributes_by_name(data_sets, attribute_names) n, bins, patches = plt.hist(data_to_plot[0], bins=bins, range=range, normed=1, histtype=histtype, facecolor='#0099FF', log=0) if fit_dist is 'normal': mu, sigma = data_to_plot.mean(), data_to_plot.std() # obtain samplemean and sample standard deviation from array y = mlab.normpdf( bins, mu, sigma) # obtain the corresponding distribution l = plt.plot(bins, y, 'r--', linewidth=1) # plot the distribution if fit_dist is 'uniform': uniform_pdf = 1 / (range[1] - range[0]) # U_pdf(x) := 1 / (b - a), horizontal line with (a, b)=range l = plt.plot(range, (uniform_pdf, uniform_pdf), 'r-', linewidth=2) # plot the uniform pdf if fit_dist is 'exponential': k = data_to_plot.mean() # the sample mean is MLE estimate of lambda ('k') of exp. dist. x = np.linspace(expon.ppf(0.01, scale=k), expon.ppf(0.99, scale=k), 100) l = plt.plot(x, expon.pdf(x, scale=k), 'r--', linewidth=1) if fit_dist is 'lognormal': data_to_plot[np.where(data_to_plot==0)] = 0.0001 x = np.log(data_to_plot) print x mu, sigma = x.mean(), x.std() print '%d %d', mu, sigma y = lognorm.pdf(x=bins, s=sigma, loc=mu, scale=math.exp(mu)) l = plt.plot(bins, y, 'r--', linewidth=1) plt.ylabel(ylabel) plt.xlabel(attribute_names) plt.title(title) plt.grid(True) plt.show()
def prob_gate(pointPOS,gatePOS): ''' compute probability according to lognormal distribution base on gate ''' d = math.sqrt((gatePOS[0]-pointPOS[0])**2+(gatePOS[1]-pointPOS[1])**2) mu = (2*math.log(4.700) + math.log(3.877)) / float(3) delta = math.sqrt(2/3*(math.log(4.7)-math.log(3.877))) return d,lognorm.pdf(d,mu,delta)
def createHisto(histoData1, histoData2, imageName1, imageName2): """ Creates a diagram showing two histograms. """ fig = plt.figure() plt.subplot(111) data1 = histoData1['data'] data2 = histoData2['data'] n, bins, patches = plt.hist(data1, _NUMBER_OF_HISTO_BARS, range=(0, data1.max()), normed=0, \ weights=np.zeros_like(data1)+1./data1.size, facecolor=_COLOR_FIRST_DATA[0], alpha=0.4, label=imageName1) n2, bins2, patches = plt.hist(data2, _NUMBER_OF_HISTO_BARS, range=(0, data2.max()), normed=0, \ weights=np.zeros_like(data2)+1./data2.size, facecolor=_COLOR_SECOND_DATA[0], alpha=0.4, label=imageName2) # 'best fit' line shape, loc, scale = lognorm.fit(data1, floc=0) # Fit a curve to the variates maximum = data1.max() if data1.max()>data2.max() else data2.max() x = np.linspace(0, 1.2 * maximum, num=500) # scaling binlength = bins[1] - bins[0] alpha = factorize(n, binlength) shape2, loc2, scale2 = lognorm.fit(data2, floc=0) # Fit a curve to the variates # scaling binlength2 = bins2[1] - bins2[0] alpha2 = factorize(n2, binlength2) # plot functions simplefilter("ignore", RuntimeWarning) # avoid warning in this method # plt.plot(bins[1:], n, 'b^', alpha=0.5) plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), _COLOR_FIRST_DATA[1]+'--') # plt.plot(bins2[1:], n2, 'g^', alpha=0.5) plt.plot(x, alpha2 * (lognorm.pdf(x, shape2, loc=0, scale=scale2)), _COLOR_SECOND_DATA[1]+'--') axe = plt.axis() newaxe =(axe[0], 1.2 * maximum, axe[2], axe[3]) plt.axis(newaxe) plt.title(histoData1['title']) plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$') plt.xlabel(histoData1['xlabel']) simplefilter("default", RuntimeWarning) # position the legend plt.legend(loc=0, frameon=0) plt.minorticks_on() return fig
def galoc_fun(x): cs = np.arange(-20,(50+step),step) mu0 = (1-(mcheck+Mshift)/mmm)*((pa[0]*(mcheck-(pa[5]-Mshift))**2 \ + pa[1]*(mcheck-(pa[5]-Mshift)) + pa[2])**pa[4] + pa[3]) rho0 = VarPars.x[0]*(10**5)*((10)**(VarPars.x[1]*(mcheck+Mshift-11))) n = len(cs) res = np.zeros((n)) for i in xrange(0,n): res[i] = lognorm.pdf(x, np.sqrt(np.log(1 + rho0/(mu0**2.0))), 0,\ np.exp(np.log(mu0)-( (1/2.0)*np.log( 1 + rho0/(mu0**2.0) ) )))*dstar(cs[i]-x,starparas) return res
def createHisto(A, title='', xlabel='', unit=''): """ Generates one histogram of the given data. """ fig = plt.figure() ax = plt.subplot(111) n, bins, patches = plt.hist(A, _NUMBER_OF_HISTO_BARS, range=(0, A.max()), normed=0, \ weights=np.zeros_like(A)+1./A.size, facecolor='cyan', alpha=0.4, label=' ') # set min and max values to return values = {} values['min'] = A.min() values['minrf'] = n[np.nonzero(n)][0] values['max'] = A.max() values['maxrf'] = n[-1] numbers = title+"\nx: "+str(bins[1:])+"\ny: "+str(n)+"\n\n" # 'best fit' line shape, loc, scale = lognorm.fit(A, floc=0) # Fit a curve to the variates x = np.linspace(0, 1.2 * A.max(), num=500) # scaling binlength = bins[1] - bins[0] alpha = factorize(n, binlength) # plot functions simplefilter("ignore", RuntimeWarning) # avoid warning in this method plt.plot(bins[1:], n, 'c^', alpha=0.5, label='Distribution') plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), 'c--', label='Fit') axe = plt.axis() newaxe =(axe[0], 1.2 * A.max(), axe[2], axe[3]) plt.axis(newaxe) plt.title(title) plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$') plt.xlabel(xlabel) simplefilter("default", RuntimeWarning) # position the legend handles, labels = ax.get_legend_handles_labels() indexL3 = labels.index(' ') labelsL3 = [labels[indexL3]] handlesL3 = [handles[indexL3]] del labels[indexL3] del handles[indexL3] l1 = plt.legend(handlesL3, labelsL3, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0) plt.legend(handles, labels, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0) plt.gca().add_artist(l1) currentaxis = fig.gca() legendText = '$\mathrm{\mathsf{\mu =}}$ %4.2f '+unit+'\n$\mathrm{\mathsf{\sigma =}}$ %4.2f '+unit plt.text(0.96, 0.86, legendText % (scale, (shape * scale)), horizontalalignment='right', \ verticalalignment='top', transform=currentaxis.transAxes) plt.minorticks_on() return fig, values, numbers
def FitPrice(data): priceData = data[:,6] priceData = priceData[~sp.isnan(priceData)] shape, loc, scale = lognorm.fit(priceData,loc = 0) x = np.linspace(0, 100, 100) p = lognorm.pdf(x, shape, loc, scale) maxIndex = 0 for i in range(0, len(p)): if p[i] >= p[maxIndex]: maxIndex = i else: break; # if the plot goes down, stop searching. return x[maxIndex]
def _wrapper_baseline(alpha, shape, x): """ This private function constructs the integrand for the application of numerical integration strategies. """ # Guard interface. assert basic_checks('_wrapper_baseline', 'in', alpha, shape, x) # Evaluate utility and weigh by probability. rslt = baseline_utility(x, alpha) * lognorm.pdf(x, shape) # Check result. assert basic_checks('_wrapper_baseline', 'out', rslt) # Finishing return rslt
def hist(x, weights=None, bins=10, distname='normal', color='b', label='pdf', filename=None): # create full data using weights z = x if weights is not None: z = np.zeros(sum(weights)) j = 0 for i in range(weights.size): for k in range(j, j + weights[i]): z[j] = x[i] j += 1 # histogram hist, bins = np.histogram(x, bins=bins, density=True, weights=weights) # fit distribution if distname is 'normal': (mu, sigma) = norm.fit(z) pdf = lambda x: norm.pdf(x, mu, sigma) elif distname is 'lognormal': sigma, loc, scale = lognorm.fit(z, floc=0) mu = np.log(scale) pdf = lambda x: lognorm.pdf(x, sigma, loc, scale=scale) elif distname is not None: raise Exception('Unsupported distribution name ' + distname) # plot distribution if (distname is not None): x = np.linspace(bins[0], bins[-1], 100) y = pdf(x) label = 'm=%2.1f, s=%2.1f [%s]' % (mu, sigma, label) plt.plot(x, y, linewidth=3, label=label, alpha=0.7, color=color) # plot histogram c = (bins[:-1] + bins[1:]) / 2; # bins centers plt.plot(c, hist, marker='s', alpha=0.7, markersize=8, linestyle='None', color=color) # format plot plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.ylabel('PDF', fontsize=16) if (filename is not None): print('Saving figure ' + filename) plt.savefig(filename, bbos_inches='tight')
def make_csd(shape, scale, npart, show_plot=False): """Create cell size distribution and save it to file.""" if shape == 0: rads = [scale + 0 * x for x in range(npart)] else: rads = lognorm.rvs(shape, scale=scale, size=npart) with open('diameters.txt', 'w') as fout: for rad in rads: fout.write('{0}\n'.format(rad)) if shape == 0: xpos = linspace(scale / 2, scale * 2, 100) else: xpos = linspace(lognorm.ppf(0.01, shape, scale=scale), lognorm.ppf(0.99, shape, scale=scale), 100) plt.plot(xpos, lognorm.pdf(xpos, shape, scale=scale)) plt.hist(rads, normed=True) plt.savefig('packing_histogram.png') plt.savefig('packing_histogram.pdf') if show_plot: plt.show()
def ddPDF(pts, mu, sigma, distribuition, outlier=0, data=0, n=10, seed=None): import numpy as np from scipy.interpolate import interp1d from distAnalyze import ddpdf, mediaMovel from scipy.stats import norm, lognorm from someFunctions import ash eps = 5e-5 ngrid = int(1e6) #ddy = lambda x,u,s: abs(-(s**2-u**2+2*u*x-x**2)/(s**5*sqrt(2*pi))*np.exp(-0.5*((u-x)/s)**2)) if distribuition == 'normal': outlier_inf = outlier_sup = outlier if not data: inf, sup = norm.interval(0.9999, loc=mu, scale=sigma) x = np.linspace(inf - outlier_inf, sup + outlier_sup, ngrid) y = ddpdf(x, mu, sigma, distribuition) else: np.random.set_state(seed) d = np.random.normal(mu, sigma, data) inf, sup = min(d) - outlier_inf, max(d) + outlier_sup #y,x = np.histogram(d,bins = 'fd',normed = True) #x = np.mean(np.array([x[:-1],x[1:]]),0) x, y = ash(d) y = abs(np.diff(y, 2)) x = x[:-2] + np.diff(x)[0] #y = abs(np.diff(mediaMovel(y,n),2)) #x = x[:-2]+np.diff(x)[0] y = y / (np.diff(x)[0] * sum(y)) elif distribuition == 'lognormal': outlier_inf = 0 outlier_sup = outlier inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=np.exp(mu)) inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu)) inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu)) if not data: x = np.linspace(inf - outlier_inf, sup + outlier_sup, ngrid) y = ddpdf(x, mu, sigma, distribuition) else: np.random.set_state(seed) d = np.random.lognormal(mu, sigma, data) #inf,sup = min(d)-outlier_inf,max(d)+outlier_sup # y,x = np.histogram(d,bins = 'fd',normed = True) #x = np.mean(np.array([x[:-1],x[1:]]),0) x, y = ash(d) y = y[x < sup] x = x[x < sup] y = abs(np.diff(y, 2)) #y = abs(np.diff(mediaMovel(y,n),2)) x = x[:-2] + np.diff(x)[0] y = y / (np.diff(x)[0] * sum(y)) #cdf = np.sum(np.tri(len(x))*y,1) cdf = np.cumsum(y) # ============================================================================= # for i in range(1,ngrid): # cdf.append(y[i]+cdf[i-1]) cdf = cdf / max(cdf) # # ============================================================================= interp = interp1d(cdf, x, fill_value='extrapolate') Y = np.linspace(eps, 1 - eps, pts) X = interp(Y) return X, Y
def diffArea(nest, outlier=0, data=0, kinds='all', axis='probability', ROI=20, mu=0, sigma=1, weight=False, interpolator='linear', distribuition='normal', seed=None, plot=True): """ Return an error area between a analitic function and a estimated discretization from a distribuition. Parameters ---------- nest: int The number of estimation points. outlier: int, optional Is the point of an outlier event, e.g outlier = 50 will put an event in -50 and +50 if mu = 0. Defaut is 0 data: int, optional If data > 0, a randon data will be inserted insted analitcs data. Defaut is 0. kinds: str or array, optional specifies the kind of distribuition to analize. ('Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2', 'all'). Defaut is 'all'. axis: str, optional specifies the x axis to analize ('probability', 'derivative', '2nd_derivative', 'X'). Defaut is 'probability'. ROI: int, optional Specifies the number of regions of interest. Defaut is 20. mu: int, optional Specifies the mean of distribuition. Defaut is 0. sigma: int, optional Specifies the standard desviation of a distribuition. Defaut is 1. weight: bool, optional if True, each ROI will have a diferent weight to analyze. Defaut is False interpolator: str, optional Specifies the kind of interpolation as a string ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic' where 'zero', 'slinear', 'quadratic' and 'cubic' refer to a spline interpolation of zeroth, first, second or third order) or as an integer specifying the order of the spline interpolator to use. Default is 'linear'. distribuition: str, optional Select the distribuition to analyze. ('normal', 'lognormal') Defaut is 'normal' plot: bool, optional If True, a plot will be ploted with the analyzes Defaut is True Returns ------- a, [b,c]: float and float of ndarray. area,[probROIord,areaROIord] returns the sum of total error area and the 'x' and 'y' values. """ import numpy as np from scipy.stats import norm, lognorm from scipy.interpolate import interp1d from numpy import exp import matplotlib.pyplot as plt from statsmodels.distributions import ECDF from distAnalyze import pdf, dpdf, ddpdf, PDF, dPDF, ddPDF area = [] n = [] data = int(data) if distribuition == 'normal': outlier_inf = outlier_sup = outlier elif distribuition == 'lognormal': outlier_inf = 0 outlier_sup = outlier ngrid = int(1e6) truth = pdf if axis == 'probability': truth1 = pdf elif axis == 'derivative': truth1 = dpdf elif axis == '2nd_derivative': truth1 = ddpdf elif axis == 'X': truth1 = lambda x, mu, sigma, distribuition: x #else: return 'No valid axis' probROIord = {} areaROIord = {} div = {} if seed is not None: np.random.set_state(seed) if data: if distribuition == 'normal': d = np.random.normal(mu, sigma, data) elif distribuition == 'lognormal': d = np.random.lognormal(mu, sigma, data) if kinds == 'all': kinds = ['Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2'] elif type(kinds) == str: kinds = [kinds] for kind in kinds: if distribuition == 'normal': inf, sup = norm.interval(0.9999, loc=mu, scale=sigma) elif distribuition == 'lognormal': inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=exp(mu)) inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu)) inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu)) xgrid = np.linspace(inf, sup, ngrid) xgridROI = xgrid.reshape([ROI, ngrid // ROI]) dx = np.diff(xgrid)[0] if kind == 'Linspace': if not data: xest = np.linspace(inf - outlier_inf, sup + outlier_sup, nest) else: if distribuition == 'normal': #d = np.random.normal(loc = mu, scale = sigma, size = data) inf, sup = min(d), max(d) xest = np.linspace(inf - outlier_inf, sup + outlier_sup, nest) elif distribuition == 'lognormal': #d = np.random.lognormal(mean = mu, sigma = sigma, size = data) inf, sup = min(d), max(d) xest = np.linspace(inf - outlier_inf, sup + outlier_sup, nest) yest = pdf(xest, mu, sigma, distribuition) elif kind == 'CDFm': eps = 5e-5 yest = np.linspace(0 + eps, 1 - eps, nest) if distribuition == 'normal': if not data: xest = norm.ppf(yest, loc=mu, scale=sigma) yest = pdf(xest, mu, sigma, distribuition) else: #d = np.random.normal(loc = mu, scale = sigma, size = data) ecdf = ECDF(d) inf, sup = min(d), max(d) xest = np.linspace(inf, sup, data) yest = ecdf(xest) interp = interp1d(yest, xest, fill_value='extrapolate', kind='nearest') yest = np.linspace(eps, 1 - eps, nest) xest = interp(yest) elif distribuition == 'lognormal': if not data: xest = lognorm.ppf(yest, sigma, loc=0, scale=exp(mu)) yest = pdf(xest, mu, sigma, distribuition) else: #d = np.random.lognormal(mean = mu, sigma = sigma, size = data) ecdf = ECDF(d) inf, sup = min(d), max(d) xest = np.linspace(inf, sup, nest) yest = ecdf(xest) interp = interp1d(yest, xest, fill_value='extrapolate', kind='nearest') yest = np.linspace(eps, 1 - eps, nest) xest = interp(yest) elif kind == 'PDFm': xest, yest = PDF(nest, mu, sigma, distribuition, outlier, data, seed) elif kind == 'iPDF1': xest, yest = dPDF(nest, mu, sigma, distribuition, outlier, data, 10, seed) elif kind == 'iPDF2': xest, yest = ddPDF(nest, mu, sigma, distribuition, outlier, data, 10, seed) YY = pdf(xest, mu, sigma, distribuition) fest = interp1d(xest, YY, kind=interpolator, bounds_error=False, fill_value=(YY[0], YY[-1])) #fest = lambda x: np.concatenate([fest1(x)[fest1(x) != -1],np.ones(len(fest1(x)[fest1(x) == -1]))*fest1(x)[fest1(x) != -1][-1]]) yestGrid = [] ytruthGrid = [] ytruthGrid2 = [] divi = [] for i in range(ROI): yestGrid.append([fest(xgridROI[i])]) ytruthGrid.append([truth(xgridROI[i], mu, sigma, distribuition)]) ytruthGrid2.append([truth1(xgridROI[i], mu, sigma, distribuition)]) divi.append( len( np.intersect1d( np.where(xest >= min(xgridROI[i]))[0], np.where(xest < max(xgridROI[i]))[0]))) diff2 = np.concatenate( abs((np.array(yestGrid) - np.array(ytruthGrid)) * dx)) #diff2[np.isnan(diff2)] = 0 areaROI = np.sum(diff2, 1) divi = np.array(divi) divi[divi == 0] = 1 try: probROI = np.mean(np.sum(ytruthGrid2, 1), 1) except: probROI = np.mean(ytruthGrid2, 1) probROIord[kind] = np.sort(probROI) index = np.argsort(probROI) areaROIord[kind] = areaROI[index] #deletes = ~np.isnan(areaROIord[kind]) #areaROIord[kind] = areaROIord[kind][deletes] #probROIord[kind] = probROIord[kind][deletes] area = np.append(area, np.sum(areaROIord[kind])) n = np.append(n, len(probROIord[kind])) div[kind] = divi[index] if plot: if weight: plt.logy(probROIord[kind], areaROIord[kind] * div[kind], '-o', label=kind, ms=3) else: plt.plot(probROIord[kind], areaROIord[kind], '-o', label=kind, ms=3) plt.yscale('log') plt.xlabel(axis) plt.ylabel('Error') plt.legend() #plt.title('%s - Pontos = %d, div = %s - %s' %(j,nest, divs,interpolator)) return area, [probROIord, areaROIord]
plt.show() #scipy.stats.lognorm.fit import numpy as np from scipy.stats import lognorm import matplotlib.pyplot as plt mu = 0 sigma = 1 samplenumbers1 = np.random.lognormal(mu,sigma,1000) samplenumbers2 = lognorm.rvs(sigma,0,np.exp(mu),1000) shape, loc, scale = lognorm.fit(samplenumbers1, floc=0) fit_mu = np.log(scale) fit_sigma = shape count, bins, ignored = plt.hist(samplenumbers1, 100, density=True, align='mid') x = np.linspace(min(bins), max(bins), 10000) pdf1 = ((np.exp(-(np.log(x) - fit_mu)**2 / (2 * fit_sigma**2)) / (x * fit_sigma * np.sqrt(2 * np.pi)))) pdf2 = lognorm.pdf(x,shape,0,scale) plt.plot(x, pdf2, linewidth=2, color='r') plt.axis('tight') plt.show()
x = np.linspace(2, 22, 4000) dists = np.array([3.958,3.685,3.897,3.317]) al,loc,beta=lognorm.fit(sig_vals_r) print al, loc, beta # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606') print lognorm.cdf(dists, al, loc=loc, scale=beta) bins, edges = np.histogram(sig_vals_r, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples') # x = np.linspace(2, 22, 4000) # dists = np.array([3.958,3.685,3.897,3.317]) # al,loc,beta=lognorm.fit(valsLP) # print al, loc, beta plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal distribution') print lognorm.cdf(dists, al, loc=loc, scale=beta) ax = plt.subplot(111) # plt.plot([3.958,3.958],[-1.0,2.0],'k-', lw=5, alpha=1.0, label='best AGC198606 detection') # plt.plot([10.733,10.733],[-1.0,2.0],'k-', lw=5, alpha=0.5, label='Leo P detection at 1.74 Mpc') # plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc') # plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc') plt.ylim(0,1.1) plt.xlim(2,7.5) plt.xlabel('$\sigma$ above local mean') plt.ylabel('$P(\sigma = X)$') plt.legend(loc='best', frameon=False) ax.set_aspect(2) # plt.show() plt.savefig('randdist.eps')
def create_animals(env_dim = 10000, animal_sp = 5, individuals = 30, stage = 0, file_name = "", inter_mov = 7.1e-5, sl_mov = -0.002): ''' This function populates a continuous landscape of dimension env_dim (m) with "individuals" animals of "animal_sp" different species. ''' # Animal agents class Animals: idd = np.arange(1, individuals+1) coords = np.array([[], []], dtype = float) sp = np.zeros((individuals,), dtype = int) seed_n = [] seed_t = [] travel = np.zeros((individuals,), dtype = float) perch = np.zeros((individuals,), dtype = float) p_interact = np.repeat(-2, individuals) # aqui esse array nao e usado, mas ja esta pronto para considerar a ultima planta visitada por cada animal animals = Animals() an_sp = animal_sp mov = np.repeat(Parms.mov_dist, individuals) time = np.repeat(Parms.daily_time, individuals) # Animal location x = env_dim*np.random.random(individuals) y = env_dim*np.random.random(individuals) animals.coords = np.append(x, y).reshape(2, individuals).transpose() # Defining plant species if Parms.a_abund_field: names, prop, mat = read_abund(file_name, stage) order = np.argsort(prop)[::-1] if Parms.frug: temp = mat[:,1]/100 temp = temp[order] if not Parms.mov_dist: mass = mat[:,0] mov_sp = inter_mov*np.exp(sl_mov*mass) mov_sp = mov_sp[order] names = names[order] prop = np.sort(prop)[::-1] an_sp = len(prop) else: names = np.arange(1, an_sp+1).astype('S10') spid = np.arange(1, an_sp+1) # species id s = 5 # parameter of the lognormal distribution prop = lognorm.pdf(spid, s, loc=0, scale=1) # proportions come from a discretized log normal distribution, with parameters (mulog = 0, sdlog = 2) prop = prop/sum(prop) # proportion of individuals of each species nind = np.rint(prop*individuals).astype(int) # number of individuals of each species if Parms.a_remove_absent: names = names[np.where(nind != 0)] if Parms.frug: temp = temp[np.where(nind != 0)] if not Parms.mov_dist: mov = mov[np.where(nind != 0)] nind = nind[np.where(nind != 0)] an_sp = len(nind) while nind.sum() < individuals: x = np.random.choice(np.arange(an_sp)) nind[x] = nind[x] + 1 while nind.sum() > individuals: x = np.random.choice(np.arange(an_sp)) if nind[x] > 1: # I am not removing any species... meybe we have to think about that! nind[x] = nind[x] - 1 prop = nind.astype(float)/nind.sum() # proportion of individuals of each species #propacum = prop.cumsum(0) # Cumulative probability for each species #sp = animal_sp-1 for ind in np.arange(individuals): #if nind[sp] <= 0: # sp = sp-1 #animals.sp[ind] = sp+1 #nind[sp] = nind[sp] - 1 animals.seed_n.append([]) animals.seed_t.append([]) i = 0 sp = 0 while sp < an_sp: if(nind[sp] > 0): animals.sp[i:(i+nind[sp])] = sp+1 if Parms.frug: time[i:(i+nind[sp])] *= temp[sp] if not Parms.mov_dist: mov[i:(i+nind[sp])] = mov_sp[sp] i = i+nind[sp] sp = sp+1 return animals.idd, animals.sp, animals.coords, time, animals.seed_n, animals.seed_t, animals.travel, animals.perch, animals.p_interact, mov, names
def pdf_LN(X, mu, sigma): ''' lognormal pdf with actual miu and sigma ''' mu_tmp, sigma_tmp = log_params(mu, sigma) return lognorm.pdf(X, s=sigma_tmp, scale=np.exp(mu_tmp))
pl.figure() z2s_all_surfaceClasses = [] param_all_surfaceClasses = [[] for _ in range(len(surfaceClasses))] z2s_all_surfaceClasses = Z2_1cmx1cm_surfaceClasses x = np.linspace(0.0,0.5,100) for i in range(len(surfaceClasses)): pl.hist( Z2_1cmx1cm_surfaceClasses[i] , normed=1, bins=40, range=[0.0,0.5], alpha=.3) param_all_surfaceClasses[i] = lognorm.fit( z2s_all_surfaceClasses[i] ) #print param_all_surfaceClasses for i in range(len(surfaceClasses)): pdf_surfaceClasses = lognorm.pdf(x,param_all_surfaceClasses[i][0],param_all_surfaceClasses[i][1],param_all_surfaceClasses[i][2]) pl.plot(x, pdf_surfaceClasses, c=scColor[i]) pl.xlabel("Z2") pl.ylabel("Probability Distribution Function") pl.savefig( str(FIG_FILE_PATH+"Z2_NatVsArt_pdf_hist.eps") ) pl.savefig( str(FIG_FILE_PATH+"Z2_NatVsArt_pdf_hist.pdf") ) pl.show() exit()
def create_environment(env_dim = 10000, plant_sp = 3, individuals = 1500, dist_corr = 200.0, rho = 0.0, stage = 0, file_name = ""): ''' This function populates a continuous landscape of dimension env_dim (m) with "individuals" plants of "plant_sp" different species. dist_corr and rho sets the autocorralation in the position of plants. We still need: - aggregate species in clusters? ''' # Plant agents class Plants: idd = np.arange(1, individuals+1) coords = np.array([[], []], dtype = float) sp = np.zeros((individuals,), dtype=int) fruits = np.array([], dtype = int) plants = Plants() pl_sp = plant_sp #np.random.seed(3) # Plant location x = env_dim*np.random.random(individuals) y = env_dim*np.random.random(individuals) plants.coords = np.append(x, y).reshape(2, individuals).transpose() # Number of fruits plants.fruits = gamma.rvs(2, scale = 8, size = individuals).astype(int) np.putmask(plants.fruits, plants.fruits > 100, 100) # Defining plant species if Parms.p_abund_field: names, prop, mat = read_abund(file_name, stage) order = np.argsort(prop)[::-1] names = names[order] prop = np.sort(prop)[::-1] else: names = np.arange(1, pl_sp+1).astype('S10') spid = np.arange(1, pl_sp+1) # species id s = 2 # parameter of the lognormal distribution prop = lognorm.pdf(spid, s, loc=0, scale=1) # proportions come from a discretized log normal distribution, with parameters (mulog = 0, sdlog = 2) prop = prop/sum(prop) # proportion of individuals of each species nind = np.rint(prop*individuals).astype(int) # number of individuals of each species # removing sps not present if Parms.p_remove_absent: names = names[np.where(nind != 0)] nind = nind[np.where(nind != 0)] pl_sp = len(nind) while nind.sum() < individuals: x = np.random.choice(np.arange(pl_sp)) nind[x] = nind[x] + 1 while nind.sum() > individuals: x = np.random.choice(np.arange(pl_sp)) nind[x] = nind[x] - 1 prop = nind.astype(float)/nind.sum() # proportion of individuals of each species propacum = prop.cumsum(0) # Cumulative probability for each species # First plant x = np.where( plants.sp == 0 )[0] index_ind = np.random.choice(x) nrand = np.random.uniform() index_sp = np.amin(np.where( nrand < propacum )) plants.sp[index_ind] = index_sp+1 # Refreshing nind[index_sp] = nind[index_sp]-1 prop = nind.astype(float)/nind.sum() if prop[index_sp] > 0.0: prop_aux = np.delete(prop, index_sp) prop_aux = np.array(map(lambda x: x * (1 - rho), prop_aux)) prop_aux = np.insert(prop_aux, index_sp, prop[index_sp] + (1 - prop[index_sp]) * rho) propacum = prop_aux.cumsum(0) else: propacum = prop.cumsum(0) # Other plants #while np.any(plants.sp == 0): while nind.sum() > 0: dists = np.array(map(distance, np.repeat(plants.coords[index_ind].reshape((1,2)), individuals, axis=0), plants.coords )) dist_index = np.where( (dists < dist_corr) * (plants.sp == 0) )[0] if dist_index.size != 0: index_ind = np.random.choice(dist_index) else: #dists_sort = np.sort(dists) #for i in np.arange(len(dists)): # indice = np.where( dists_sort[i] == dists ) # if plants.sp[indice] == 0: # index_ind = indice # break; x = np.where( plants.sp == 0 )[0] index_ind = np.random.choice(x) nrand = np.random.uniform() index_sp = np.amin(np.where( nrand < propacum )) plants.sp[index_ind] = index_sp+1 # Refreshing nind[index_sp] = nind[index_sp]-1 prop = nind.astype(float)/nind.sum() print nind.sum() if prop[index_sp] > 0.0: prop_aux = np.delete(prop, index_sp) prop_aux = np.array(map(lambda x: x * (1 - rho), prop_aux)) prop_aux = np.insert(prop_aux, index_sp, prop[index_sp] + (1 - prop[index_sp]) * rho) propacum = prop_aux.cumsum(0) else: propacum = prop.cumsum(0) return plants.idd, plants.sp, plants.fruits, plants.coords, names
from scipy.stats import lognorm import math from scipy.interpolate import UnivariateSpline import sys data = sp.genfromtxt(sys.argv[1], delimiter=",") freq = {} priceData = data[:, 4] priceData = priceData[~sp.isnan(priceData)] shape, loc, scale = lognorm.fit(priceData,loc = 0) plt.hist(priceData, bins=100, normed=True, alpha=0.6, color='g') xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = lognorm.pdf(x, shape, loc, scale) print(p) print(x) maxIndex = 0 for i in range(0, len(p)): if p[i] >= p[maxIndex]: maxIndex = i else: break; # if the plot goes down, stop searching. maxX = x[maxIndex] plt.plot(x, p, 'k', linewidth=2) plt.title("Max x = " + str(maxX)) plt.show()
def P(mu,data_obsDict,PrettyHugeDict,lnN_matrix,datacards_dict,processes_dict,stats,**kwargs): ff=1 I=0 c=0 plus=0 for datacard in PrettyHugeDict.iterkeys(): # ACHTUNG: "for k" and "for processes" have been interchanged... for k in xrange(0,len(data_obsDict[datacard])):# is there always a process "data_obs" which gives the representative bin number? if ff < 1e-100: ff*=1e+100 plus+=1 #print "---------------------------------" #print k, ff sk=0 bk=0 for process in PrettyHugeDict[datacard].iterkeys(): bin_k=PrettyHugeDict[datacard][process][stats[datacards_dict[datacard]][processes_dict[datacard][process]]][k] # get the nominal value: mu_k=bin_k[1] # get the statistical sigma: # are the Ups and Downs symmetric around the nominal value? - First look: yes... # bei Wln sind die Ups und Downs auf den Bins durcheinandergeworfen! Der Abstand scheint aber wieder zu stimmen --- abs() sigma_k=abs(bin_k[1]-bin_k[0]) # beim ersten Durchlauf die lnN-priors mit rausholen -> c scale_factors=1 for scale in lnN_matrix.iterkeys(): # lnN-priors dranmultiplizieren if c < len(lnN_matrix): ff*=lognorm.pdf(kwargs[scale],1) c+=1 # scale_factors generieren xxx=lnN_matrix[scale][datacards_dict[datacard]][processes_dict[datacard][process]] if xxx != "-": scale_factors*=power(kwargs[scale],(float(xxx) - 1.0)) # ------------------------------------------------------------ GBC_sum=0 for syst in PrettyHugeDict[datacard][process]: if not "stat" in syst: # GBC(IntVars[syst]) GBC_sum+=(PrettyHugeDict[datacard][process][syst][k][kwargs[syst]] - mu_k) # sum of the backgrounds (required for the likelihood) kwarg=kwargs[datacard+"-"+process+"-"+str(k+1)] if kwarg == "-": if process != ("ZH" or "WH"): bk+=scale_factors*(mu_k + GBC_sum) else: sk+=scale_factors*(mu_k + GBC_sum) else: # if kwargs[datacard+"-"+process+"-"+str(k+1)] != "-": --- # if sigma_k > 0.1: ff*=norm.pdf(kwarg,scale_factors*(mu_k + GBC_sum),sigma_k) if (norm.pdf(kwarg,scale_factors*(mu_k + GBC_sum),sigma_k) == 0.0): print datacard, process, k, "GAUSS: ", norm.pdf(kwargs[datacard+"-"+process+"-"+str(k+1)],scale_factors*(mu_k + GBC_sum),sigma_k),kwargs[datacard+"-"+process+"-"+str(k+1)],scale_factors*(mu_k + GBC_sum),sigma_k if process != ("ZH" or "WH"): bk+=kwarg else: sk+=kwarg #print process, ff # ------------------------------------------------------------ #sk und bk verarbeiten #print "Poisson", data_obsDict[datacard][k],sk,bk, ExtendedPoisson(data_obsDict[datacard][k],mu,sk,bk) #print "---------------------------------" ff*=ExtendedPoisson(data_obsDict[datacard][k],mu,sk,bk) # Jeffreys-Prior vorbereiten I+=Fisher(mu,sk,bk) # Jeffreys-Prior # I kann stellenweise negativ werden - sollte aber keine Rolle spielen, weil der Poissonian dort eh verschwindet... I=sqrt(abs(I)) # "plus" seems to range from 13 to 17 - take 15 as the default value and compute the relative exponents from there: #plus=100*(15-plus) #ff*=10**plus #print plus, ff # Feddige Foarmel... return I*ff
% ( height[range_level], std(reflCompressed), skew(reflCompressed) ) ) #pdb.set_trace() # Plot histogram of copolar radar reflectivity plt.subplot(212) n, bins, patches = plt.hist(reflCompressed, 50, normed=True, histtype='stepfilled') plt.setp(patches, 'facecolor', 'g', 'alpha', 0.75) # Overplot best-fit truncated normal truncNormCurve = plt.plot(reflRange, norm.pdf(reflRange,loc=mu,scale=sigma)/(1.0-norm.cdf((minRefl-mu)/sigma)), label="Truncated normal") # Overplot best-fit normal normCurve = plt.plot( reflRange, norm.pdf(reflRange,loc=truncMean,scale=sqrt(truncVarnce)) , label="Normal" ) # Overplot best-fit lognormal plt.plot( reflRange , lognorm.pdf( reflRange - minRefl, sqrt(sigma2LogN), loc=0, scale=expMuLogN ) , label="Lognormal" ) plt.xlabel('Copolar radar reflectivity') plt.ylabel('Probability') plt.legend() #plt.show() plt.draw() #plt.close() #exit
def distfit(n,dists,title,ra,dec,fwhm, dm): import numpy as np import matplotlib.pyplot as plt # from scipy.optimize import curve_fit from scipy.stats import lognorm from scipy import ndimage # n = 279 bins = 165 width = 22 # fwhm = 2.0 sig = ((bins/width)*fwhm)/2.355 valsLP = [] for i in range(25000) : random_ra = ra*np.random.random_sample((n,)) random_dec = dec*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) valsLP.append(S_r[x_cent_r][y_cent_r]) # valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True) # vals = np.loadtxt('values.txt', usecols=(0,), unpack=True) # bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True) # centers = (edges[:-1] + edges[1:])/2. # plt.scatter(centers, bins, edgecolors='none') x = np.linspace(2, 22, 4000) # al,loc,beta=lognorm.fit(vals) # print al, loc, beta # # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606') # print lognorm.cdf(dists, al, loc=loc, scale=beta) bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. # x = np.linspace(2, 22, 4000) # dists = np.array([3.958,3.685,3.897,3.317]) al,loc,beta=lognorm.fit(valsLP) # print al, loc, beta plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution') print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)) plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples') # print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta)) ax = plt.subplot(111) plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') # plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc') # plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc') # plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc') plt.ylim(0,1.1) plt.xlim(2,12) plt.xlabel('$\sigma$ above local mean') plt.ylabel('$P(\sigma = X)$') plt.legend(loc='best', frameon=True) ax.set_aspect(3) # plt.show() plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')
def integrand1(z): return lognorm.pdf(z,1)
def generateDistributionPlotValues(self, specification) : sample_number = 1000 x_values = [] y_values = [] # Generate plot values from selected distribution via PDF distribution = specification['distribution'] if distribution == 'uniform' : lower = specification['settings']['lower'] upper = specification['settings']['upper'] base = upper - lower incr = base/sample_number for i in range(sample_number) : x_values.append(lower+i*incr) y_values = uniform.pdf(x_values, loc=lower, scale=base).tolist() elif distribution == 'normal' : mean = specification['settings']['mean'] std_dev = specification['settings']['std_dev'] x_min = mean - 3*std_dev x_max = mean + 3*std_dev incr = (x_max - x_min)/sample_number for i in range(sample_number) : x_values.append(x_min+i*incr) y_values = norm.pdf(x_values, loc=mean, scale=std_dev).tolist() elif distribution == 'triangular' : a = specification['settings']['a'] base = specification['settings']['b'] - a c_std = (specification['settings']['c'] - a)/base incr = base/sample_number for i in range(sample_number) : x_values.append(a+i*incr) y_values = triang.pdf(x_values, c_std, loc=a, scale=base).tolist() elif distribution == 'lognormal' : lower = specification['settings']['lower'] scale = specification['settings']['scale'] sigma = specification['settings']['sigma'] x_max = lognorm.isf(0.01, sigma, loc=lower, scale=scale) incr = (x_max - lower)/sample_number for i in range(sample_number) : x_values.append(lower+i*incr) y_values = lognorm.pdf(x_values, sigma, loc=lower, scale=scale).tolist() elif distribution == 'beta' : lower = specification['settings']['lower'] base = specification['settings']['upper'] - lower incr = base/sample_number for i in range(sample_number) : x_values.append(lower+i*incr) a = specification['settings']['alpha'] b = specification['settings']['beta'] y_values = beta.pdf(x_values, a, b, loc=lower, scale=base).tolist() # Remove any nan/inf values remove_indexes = [] for i in range(sample_number) : if not np.isfinite(y_values[i]) : remove_indexes.append(i) for i in range(len(remove_indexes)) : x_values = np.delete(x_values, i) y_values = np.delete(y_values, i) return { 'x_values' : x_values, 'y_values' : y_values }
# ## Recover the HFP histogram option = namedtuple('option', 'n_bins') option.n_bins = round(10 * log(t_)) p, x = HistogramFP(pnl.reshape(1, -1), flex_probs, option) # ## Compute the MMFP pdf # + xx = sort(x) xx = r_[xx, npmax(xx) + arange(0.001, 0.051, 0.001)] m1 = flex_probs @ pnl.T m3 = flex_probs @ ((pnl - m1)**3).T sln = lognorm.pdf(sign(m3) * xx - c, sqrt(sig2), scale=exp(mu)) # fitted pdf date_dt = array([date_mtop(datenum(i)) for i in date]) myFmt = mdates.DateFormatter('%d-%b-%Y') date_tick = arange(200 - 1, t_, 820) # - # ## Generate the figure # + f = figure() # HFP histogram with MMFP pdf superimposed h1 = plt.subplot(3, 1, 1) b = bar(x[:-1], p[0], width=x[1] - x[0],
def valsigi(nu): result = integrate.quad(lambda x: vali(x)*lognorm.pdf(x-log(nu), sig), 1,10) return result[0]
def _pdf(self, x, mu1, mu2, sigma1, sigma2, lamb): return lamb * lognorm.pdf(x, s=sigma1, scale=np.exp(mu1)) +\ (1 - lamb) * lognorm.pdf(x, s=sigma2, scale=np.exp(mu2))
def P(mu,data_obsDict,PrettyHugeDict,lnN_matrix,datacards_dict,processes_dict,stats,**kwargs): ff=0 I=0 c=0 plus=0 for datacard in PrettyHugeDict.iterkeys(): # ACHTUNG: "for k" and "for processes" have been interchanged... for k in xrange(0,len(data_obsDict[datacard])):# is there always a process "data_obs" which gives the representative bin number? sk=0 bk=0 for process in PrettyHugeDict[datacard].iterkeys(): bin_k=PrettyHugeDict[datacard][process][stats[datacards_dict[datacard]][processes_dict[datacard][process]]][k] # get the nominal value: mu_k=bin_k[1] # get the statistical sigma: # are the Ups and Downs symmetric around the nominal value? - First look: yes... # bei Wln sind die Ups und Downs auf den Bins durcheinandergeworfen! Der Abstand scheint aber wieder zu stimmen --- abs() sigma_k=abs(bin_k[1]-bin_k[0]) # beim ersten Durchlauf die lnN-priors mit rausholen -> c scale_factors=1 for scale in lnN_matrix.iterkeys(): # lnN-priors dranmultiplizieren if c < len(lnN_matrix): ff+=log(lognorm.pdf(kwargs[scale],1)) if lognorm.pdf(kwargs[scale],1) == 0.0: print lognorm.pdf(kwargs[scale],1) c+=1 # scale_factors generieren xxx=lnN_matrix[scale][datacards_dict[datacard]][processes_dict[datacard][process]] if xxx != "-": scale_factors*=power(kwargs[scale],(float(xxx) - 1.0)) # ------------------------------------------------------------ GBC_sum=0 for syst in PrettyHugeDict[datacard][process]: if not "stat" in syst: # GBC(IntVars[syst]) GBC_sum+=(PrettyHugeDict[datacard][process][syst][k][kwargs[syst]] - mu_k) # sum of the backgrounds (required for the likelihood) kwarg=kwargs[datacard+"-"+process+"-"+str(k+1)] if kwarg == "-": if process != ("ZH" or "WH"): bk+=scale_factors*(mu_k + GBC_sum) else: sk+=scale_factors*(mu_k + GBC_sum) else: # if kwargs[datacard+"-"+process+"-"+str(k+1)] != "-": --- # if sigma_k > 0.1: ff+=log(norm.pdf(kwarg,scale_factors*(mu_k + GBC_sum),sigma_k)) if process != ("ZH" or "WH"): bk+=kwarg else: sk+=kwarg # ------------------------------------------------------------ #sk und bk verarbeiten ff+=log(ExtendedPoisson(data_obsDict[datacard][k],mu,sk,bk)) # Jeffreys-Prior vorbereiten I+=Fisher(mu,sk,bk) # Jeffreys-Prior # I kann stellenweise negativ werden - sollte aber keine Rolle spielen, weil der Poissonian dort eh verschwindet... I=sqrt(abs(I)) ff+=log(I) if ff == -float("Inf"): return 0.0 else: return exp(ff+4300)
def distance_metric(self, statistic='all', verbose=False, plot_kwargs1={ 'color': 'b', 'marker': 'D', 'label': '1' }, plot_kwargs2={ 'color': 'g', 'marker': 'o', 'label': '2' }, save_name=None): ''' Calculate the distance. *NOTE:* The data are standardized before comparing to ensure the distance is calculated on the same scales. Parameters ---------- statistic : 'all', 'hellinger', 'ks', 'lognormal' Which measure of distance to use. labels : tuple, optional Sets the labels in the output plot. verbose : bool, optional Enables plotting. plot_kwargs1 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset1`. plot_kwargs2 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset2`. save_name : str,optional Save the figure when a file name is given. ''' if statistic is 'all': self.compute_hellinger_distance() self.compute_ks_distance() # self.compute_ad_distance() if self._do_fit: self.compute_lognormal_distance() elif statistic is 'hellinger': self.compute_hellinger_distance() elif statistic is 'ks': self.compute_ks_distance() elif statistic is 'lognormal': if not self._do_fit: raise Exception("Fitting must be enabled to compute the" " lognormal distance.") self.compute_lognormal_distance() # elif statistic is 'ad': # self.compute_ad_distance() else: raise TypeError("statistic must be 'all'," "'hellinger', 'ks', or 'lognormal'.") # "'hellinger', 'ks' or 'ad'.") if verbose: import matplotlib.pyplot as plt defaults1 = {'color': 'b', 'marker': 'D', 'label': '1'} defaults2 = {'color': 'g', 'marker': 'o', 'label': '2'} for key in defaults1: if key not in plot_kwargs1: plot_kwargs1[key] = defaults1[key] for key in defaults2: if key not in plot_kwargs2: plot_kwargs2[key] = defaults2[key] if self.normalization_type == "standardize": xlabel = r"z-score" elif self.normalization_type == "center": xlabel = r"$I - \bar{I}$" elif self.normalization_type == "normalize_by_mean": xlabel = r"$I/\bar{I}$" else: xlabel = r"Intensity" # Print fit summaries if using fitting if self._do_fit: try: print(self.PDF1._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 1!") try: print(self.PDF2._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 2!") # PDF plt.subplot(121) plt.semilogy(self.bin_centers, self.PDF1.pdf, color=plot_kwargs1['color'], linestyle='none', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) plt.semilogy(self.bin_centers, self.PDF2.pdf, color=plot_kwargs2['color'], linestyle='none', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: # Plot the fitted model. vals = np.linspace(self.bin_centers[0], self.bin_centers[-1], 1000) fit_params1 = self.PDF1.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-') fit_params2 = self.PDF2.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-') plt.grid(True) plt.xlabel(xlabel) plt.ylabel("PDF") plt.legend(frameon=True) # ECDF ax2 = plt.subplot(122) ax2.yaxis.tick_right() ax2.yaxis.set_label_position("right") if self.normalization_type is not None: ax2.plot(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.plot(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.plot( vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-', ) ax2.plot( vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-', ) else: ax2.semilogx(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.semilogx(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.semilogx( vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-', ) ax2.semilogx( vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-', ) plt.grid(True) plt.xlabel(xlabel) plt.ylabel("ECDF") plt.tight_layout() if save_name is not None: plt.savefig(save_name) plt.close() else: plt.show() return self
plt.xlabel('Distance (m)') plt.ylim(0, 0.125) n = 1 lognorm_mean_distance = [] lognorm_var_distance = [] lognorm_skew_distance = [] lognorm_kurt_distance = [] for bins in size_nest_distances: if len(bins) > 10: s, loc, scale = lognorm.fit(bins) mean, var, skew, kurt = lognorm.stats(s, loc, scale, moments='mvsk') lognorm_mean_distance.append(mean) lognorm_var_distance.append(var) lognorm_skew_distance.append(skew) lognorm_kurt_distance.append(kurt) pdf = lognorm.pdf(x, s, loc, scale) plt.plot(x, pdf, label='{}'.format( n)) #In this plot I am plotting all distributions together n += 1 plt.legend() plt.figure(7) plt.title('Lognormal distributions of distance bins') plt.ylabel('Sizes (m)') plt.xlabel('Distance bin (m)') n = 1 x = np.arange(0, 200, 0.1) for bins in size_nest_distances: if len(bins) > 10: s, loc, scale = lognorm.fit(bins) pdf = lognorm.pdf(x, s, loc, scale)
def density(self, x): return lognorm.pdf(x, self.s, loc=self.mu, scale=self.sigma)
def iBSOption(Diff0,Var_lnDiff,K,G,Mu_lnG,Var_lnG): d1=(log(Diff0/(K-G)))/sqrt(Var_lnDiff)+0.5*sqrt(Var_lnDiff) d2=d1-sqrt(Var_lnDiff) OptVal=Diff0*norm.cdf(d1,0.,1.)-(K-G)*norm.cdf(d2,0.,1.) return OptVal*lognorm.pdf(G,sqrt(Var_lnG),0,exp(Mu_lnG))
def __init__(self, a, b, n, name, pa=0.1, pb=0.9, lognormal=False, Plot=True): mscale.register_scale(ProbitScale) if Plot: fig = plt.figure(facecolor="white") ax1 = fig.add_subplot(121, axisbelow=True) ax2 = fig.add_subplot(122, axisbelow=True) ax1.set_xlabel(name) ax1.set_ylabel("ECDF and Best Fit CDF") prop = matplotlib.font_manager.FontProperties(size=8) if lognormal: sigma = (log(b) - log(a)) / ( (erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2**0.5)) mu = log(a) - erfinv(2 * pa - 1) * sigma * (2**0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: lognorm.ppf(v, sigma, scale=exp(mu)), cdf) x = lognorm.rvs(sigma, scale=exp(mu), size=n) x.sort() print "generating lognormal %s, p50 %0.3f, size %s" % (name, exp(mu), n) x_s, ecdf_x = ecdf(x) best_fit = lognorm.cdf(x, sigma, scale=exp(mu)) if Plot: ax1.set_xscale('log') ax2.set_xscale('log') hist_y = lognorm.pdf(x_s, std(log(x)), scale=exp(mu)) else: sigma = (b - a) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2**0.5)) mu = a - erfinv(2 * pa - 1) * sigma * (2**0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: norm.ppf(v, mu, scale=sigma), cdf) print "generating normal %s, p50 %0.3f, size %s" % (name, mu, n) x = norm.rvs(mu, scale=sigma, size=n) x.sort() x_s, ecdf_x = ecdf(x) best_fit = norm.cdf((x - mean(x)) / std(x)) hist_y = norm.pdf(x_s, loc=mean(x), scale=std(x)) pass if Plot: ax1.plot(ppf, cdf, 'r-', linewidth=2) ax1.set_yscale('probit') ax1.plot(x_s, ecdf_x, 'o') ax1.plot(x, best_fit, 'r--', linewidth=2) n, bins, patches = ax2.hist(x, normed=1, facecolor='green', alpha=0.75) bincenters = 0.5 * (bins[1:] + bins[:-1]) ax2.plot(x_s, hist_y, 'r--', linewidth=2) ax2.set_xlabel(name) ax2.set_ylabel("Histogram and Best Fit PDF") ax1.grid(b=True, which='both', color='black', linestyle='-', linewidth=1) #ax1.grid(b=True, which='major', color='black', linestyle='--') ax2.grid(True) return
# @Time : 2020/5/14 22:07 # @Author : gzzang # @File : lognorm # @Project : notebook from scipy.stats import lognorm import matplotlib.pyplot as plt import numpy as np import pdb fig, ax = plt.subplots(1, 1) s = 0.954 s = 0.5 mean, var, skew, kurt = lognorm.stats(s, moments='mvsk') x = np.linspace(lognorm.ppf(0.01, s), lognorm.ppf(0.99, s), 100) ax.plot(x, lognorm.pdf(x, s), 'r-', lw=5, alpha=0.6, label='lognorm pdf') rv = lognorm(s) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = lognorm.ppf([0.001, 0.5, 0.999], s) print(np.allclose([0.001, 0.5, 0.999], lognorm.cdf(vals, s))) print(np.allclose(a=[1.1000000000001, 1.2], b=[1.1, 1.2])) # pdb.set_trace() r = lognorm.rvs(s, size=1000) sigma = s mu = 0
with open('datafile2.csv') as csvfile2: reader = csv.reader(csvfile2) for row in reader: data2.append(float(row[0])) # sort the data data2 = np.sort(data2) # number for parameters in the distribution for k value for AIC, each one has two parameters need to be estimated num_params = 2 # Parameter estimates for generic data shape1, loc1, scale1 = lognorm.fit(data2, floc=0) mu1 = np.log(scale1) sigma1 = shape1 y1 = lognorm.pdf(data2, s=sigma1, scale=np.exp(mu1)) log_likelihood1 = np.sum(np.log(y1)) print("Lognorm loglikelihood = " + str(log_likelihood1)) aic1= -2 * log_likelihood1 + 2 * num_params print("Lognorm AIC = " + str(aic1)) # https://stackoverflow.com/questions/33070724/determine-weibull-parameters-from-data # Parameter estimates for generic data shape2, loc2, scale2 = weibull_min.fit(data2, floc=0) c = shape2 b = scale2 y2 = weibull_min.pdf(data2, c, scale=b) log_likelihood2 = np.sum(np.log(y2)) print("Weibull loglikelihood = " + str(log_likelihood2)) aic2= -2 * log_likelihood2 + 2 * num_params print("Weibull AIC = " + str(aic2))
def calc_pdf(density=1e-7, L_nu=1e50, sigma=1, gamma=2.19, logMu_range=[-10, 6], N_Mu_bins=200, z_limits=[0.04, 10.], nzbins=120, Lum_limits=[1e45, 1e54], nLbins=120, flux_to_mu=10763342917.859608): """ Parameter: - density in 1/Mpc^3 - L_nu in erg/yr - sigma in dex - gamma - flux_to_mu Integration Parameters - logMu_range = [-10,6] # expected range in log mu, - N_Mu_bins = 200 # number of bins for log nu histogram - z_limits = [0.04, 10.] # Redshift limits - nzbins = 120 # number of z bins - Lum_limits = [1e45,1e54] # Luminosity limits - nLbins = 120 # number of logLuminosity bins """ # Conversion Factors Mpc_to_cm = 3.086e+24 erg_to_GeV = 624.151 year2sec = 365 * 24 * 3600 cosmology = {'omega_M_0': 0.308, 'omega_lambda_0': 0.692, 'h': 0.678} cosmology = set_omega_k_0(cosmology) # Flat universe ### Define the Redshift and Luminosity Evolution redshift_evolution = lambda z: HopkinsBeacom2006StarFormationRate(z) LF = lambda z, logL: redshift_evolution(z)*np.log(10)*10**logL * \ lognorm.pdf(10**logL, np.log(10)*sigma, scale=L_nu*np.exp(-0.5*(np.log(10)*sigma)**2)) N_tot, int_norm = tot_num_src(redshift_evolution, cosmology, z_limits[-1], density) print "Total number of sources {:.0f} (All-Sky)".format(N_tot) # Setup Arrays logMu_array = np.linspace(logMu_range[0], logMu_range[1], N_Mu_bins) Flux_from_fixed_z = [] zs = np.linspace(z_limits[0], z_limits[1], nzbins) deltaz = (float(z_limits[1]) - float(z_limits[0])) / nzbins Ls = np.linspace(np.log10(Lum_limits[0]), np.log10(Lum_limits[1]), nLbins) deltaL = (np.log10(Lum_limits[1]) - np.log10(Lum_limits[0])) / nLbins # Integration t0 = time.time() Count_array = np.zeros(N_Mu_bins) muError = [] tot_bins = nLbins * nzbins print('Starting Integration...Going to evaluate {} bins'.format(tot_bins)) N_sum = 0 Flux_from_fixed_z.append([]) print "-" * 20 # Loop over redshift bins for z_count, z in enumerate(zs): # Conversion Factor for given z bz = calc_conversion_factor(z, gamma) dlz = luminosity_distance(z, **cosmology) tot_flux_from_z = 0. # Loop over Luminosity bins for l_count, lum in enumerate(Ls): run_id = z_count * nLbins + l_count if run_id % (tot_bins / 10) == 0.: print "{}%".format(100 * run_id / tot_bins) # Number of Sources in dN = calc_dN(LF, lum, z, deltaL, deltaz, N_tot, int_norm, cosmology) N_sum += dN #Flux to Source Strength logmu = np.log10(flux_to_mu * erg_to_GeV * 10**lum / year2sec / (4 * np.pi * (Mpc_to_cm * dlz)**2) * bz) # Add dN to Histogram if logmu < logMu_range[1] and logmu > logMu_range[0]: tot_flux_from_z += dN * 10**logmu idx = int((logmu - logMu_range[0]) * N_Mu_bins / (logMu_range[1] - logMu_range[0])) Count_array[idx] += dN else: muError.append(logmu) Flux_from_fixed_z.append(tot_flux_from_z) print "Number of Mu out of Range: {}".format(len(muError)) print "Num Sou {}".format(N_sum) t1 = time.time() print "-" * 20 print "\n Time needed for {}x{} bins: {}s".format(nzbins, nLbins, int(t1 - t0)) return logMu_array, Count_array, zs, Flux_from_fixed_z
def log_normal_pdf(x): ''' Function to calculate PDF of a log-normal distribution with mu=1.62 and sigma=0.42 at a given x ''' return lognorm.pdf(x, 0.42, scale=math.exp(1.62))
def PDF(pts, mu, sigma, distribuition, outlier=0, data=0, seed=None): from scipy.stats import norm, lognorm import numpy as np from scipy.interpolate import interp1d from someFunctions import ash eps = 5e-5 if distribuition == 'normal': outlier_inf = outlier_sup = outlier if not data: inf, sup = norm.interval(0.9999, loc=mu, scale=sigma) X1 = np.linspace(inf - outlier, mu, int(1e6)) Y1 = norm.pdf(X1, loc=mu, scale=sigma) interp = interp1d(Y1, X1) y1 = np.linspace(Y1[0], Y1[-1], pts // 2 + 1) x1 = interp(y1) X2 = np.linspace(mu, sup + outlier, int(1e6)) Y2 = norm.pdf(X2, loc=mu, scale=sigma) interp = interp1d(Y2, X2) y2 = np.flip(y1, 0) x2 = interp(y2) else: np.random.set_state(seed) d = np.random.normal(mu, sigma, data) inf, sup = min(d) - outlier_inf, max(d) + outlier_sup #yest,xest = np.histogram(d,bins = 'fd',normed = True) xest, yest = ash(d) xest = np.mean(np.array([xest[:-1], xest[1:]]), 0) M = np.where(yest == max(yest))[0][0] m = np.where(yest == min(yest))[0][0] interpL = interp1d(yest[:M + 1], xest[:M + 1], assume_sorted=False, fill_value='extrapolate') interpH = interp1d(yest[M:], xest[M:], assume_sorted=False, fill_value='extrapolate') y1 = np.linspace(yest[m] + eps, yest[M], pts // 2 + 1) x1 = interpL(y1) y2 = np.flip(y1, 0) x2 = interpH(y2) elif distribuition == 'lognormal': outlier_inf = 0 outlier_sup = outlier inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=np.exp(mu)) inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu)) inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu)) if not data: mode = np.exp(mu - sigma**2) X1 = np.linspace(inf - outlier_inf, mode, int(1e6)) Y1 = lognorm.pdf(X1, sigma, loc=0, scale=np.exp(mu)) interp = interp1d(Y1, X1) y1 = np.linspace(Y1[0], Y1[-1], pts // 2 + 1) x1 = interp(y1) X2 = np.linspace(mode, sup + outlier_sup, int(1e6)) Y2 = lognorm.pdf(X2, sigma, loc=0, scale=np.exp(mu)) interp = interp1d(Y2, X2) y2 = np.flip(y1, 0) x2 = interp(y2) else: np.random.set_state(seed) d = np.random.lognormal(mu, sigma, data) #inf,sup = min(d)-outlier_inf,max(d)+outlier_sup #yest,xest = np.histogram(d,bins = 'fd',normed = True) #xest = np.mean(np.array([xest[:-1],xest[1:]]),0) xest, yest = ash(d) yest = yest[xest < sup] xest = xest[xest < sup] M = np.where(yest == max(yest))[0][0] m = np.where(yest == min(yest))[0][0] interpL = interp1d(yest[:M + 1], xest[:M + 1], fill_value='extrapolate') interpH = interp1d(yest[M:], xest[M:]) y1 = np.linspace(yest[m] + eps, yest[M], pts // 2 + 1) x1 = interpL(y1) y2 = np.flip(y1, 0) x2 = interpH(y2) X = np.concatenate([x1[:-1], x2]) Y = np.concatenate([y1[:-1], y2]) return X, Y
def decide_parameters_and_distribution(disname): params_lst = [] cont = 1 if disname == "norm": while True: m, s = map(float, input("m:平均,s:分散:").split()) params_lst.append([m, s]) cont = int(input("0:終了する.それ以外の数字:続ける")) if cont == 0: break for param in params_lst: mu = param[0] s = param[1] X = np.arange(start=mu - 3 * s, stop=mu + 3 * s, step=0.1) norm_pdf = norm.pdf(x=X, loc=mu, scale=s) plt.plot(X, norm_pdf, label="mu={},sigma={}".format(mu, s)) plt.legend() plt.show() return if disname == "expon": while True: lam = float(input("lam:平均")) params_lst.append(lam) cont = int(input("0:終了する:")) if cont == 0: break for param in params_lst: lam = param X = np.arange(start=-1, stop=15, step=0.1) norm_pdf = expon.pdf(x=X, loc=lam) plt.plot(X, norm_pdf, label="λ={}".format(lam)) plt.legend() plt.show() return if disname == "gamma": while True: k, theta = map(float, input("k:形状,theta:尺度:").split()) params_lst.append([k, theta]) cont = int(input("0:終了する:")) if cont == 0: break for param in params_lst: k = param[0] theta = param[1] X = np.arange(start=-1, stop=k * (theta**2), step=0.1) norm_pdf = gamma.pdf(x=X, a=k, scale=theta) plt.plot(X, norm_pdf, label="k={},theta={}".format(k, theta)) plt.legend() plt.show() return if disname == "beta": while True: a, b = map(float, input("a:形状母数,b:形状母数:").split()) params_lst.append([a, b]) cont = int(input("0:終了する:")) if cont == 0: break for param in params_lst: a = param[0] b = param[1] X = np.arange(start=0, stop=1, step=0.01) norm_pdf = beta.pdf(x=X, a=a, b=b) plt.plot(X, norm_pdf, label="a={},b={}".format(a, b)) plt.legend() plt.show() return if disname == "cauchy": X = np.arange(start=-2, stop=2, step=0.1) norm_pdf = cauchy.pdf(x=X, ) plt.plot(X, norm_pdf) plt.legend() plt.show() return if disname == "log_normal": while True: m, s = map(float, input("m:平均,s:分散:").split()) params_lst.append([m, s]) cont = int(input("0:終了する.それ以外の数字:続ける")) if cont == 0: break for param in params_lst: mu = param[0] s = param[1] X = np.arange(start=0, stop=mu + 3 * s, step=0.1) norm_pdf = lognorm.pdf(x=X, s=mu, scale=s) plt.plot(X, norm_pdf, label="mu={},sigma={}".format(mu, s)) plt.legend() plt.show() return if disname == "cauchy": X = np.arange(start=-2, stop=2, step=0.1) norm_pdf = cauchy.pdf(x=X, ) plt.plot(X, norm_pdf) plt.legend() plt.show() return if disname == "pareto": while True: a, s = map(float, input("a:平均,s:分散:").split()) params_lst.append([a, s]) cont = int(input("0:終了する.それ以外の数字:続ける")) if cont == 0: break for param in params_lst: mu = param[0] s = param[1] X = np.arange(start=0, stop=mu + 3 * s, step=0.1) norm_pdf = lognorm.pdf(x=X, s=a, scale=s) plt.plot(X, norm_pdf, label="mu={},sigma={}".format(a, s)) plt.legend() plt.show() return if disname == "wible": while True: a, b = map(float, input("a:形状母数,s:経常母数:").split()) params_lst.append([a, b]) cont = int(input("0:終了する.それ以外の数字:続ける")) if cont == 0: break for param in params_lst: a = param[0] b = param[1] X = np.arange(start=0, stop=a + 3 * b, step=0.1) norm_pdf = lognorm.pdf(x=X, s=a, scale=b) plt.plot(X, norm_pdf, label="mu={},sigma={}".format(a, b)) plt.legend() plt.show() return
def lognorm_pdf(x,mean,std): dist_pdf = lognorm.pdf(x,std,0,mean) return dist_pdf
def pdf(self, x): return lognorm.pdf(x, 0.3, 0, 2)
def expected(x, i=-1): return lognorm.pdf(x, 1)
def pdf(self, x): return lognorm.pdf(x, self.shape, self.loc, self.scale)
def fn_onstatetime_hist(file_name, folder, mean_onstatetime, distribution): """ Plots histogram of total number of and fits to lognormal distribution Inputs: data, filename and foldername which should be defined in the script """ import numpy as np import matplotlib.pyplot as plt from scipy.stats import lognorm, norm from pylab import text n_molecules = len(mean_onstatetime) #Plot photon flux figure_name = file_name + '_onstatetime' ax = plt.subplot(111) num_bins = np.linspace(int(min(mean_onstatetime)), int(np.mean(mean_onstatetime)), int(np.sqrt(len(mean_onstatetime)) * 8)) ax.hist(mean_onstatetime, bins=num_bins, density=True, color='forestgreen', edgecolor='black') #Choose distribution if distribution == 'lognormal': #Fit lognormal curve sigma, loc, mean = lognorm.fit(mean_onstatetime, floc=0) pdf = lognorm.pdf(num_bins, sigma, loc, mean) #sigma=shape, mu=np.log(scale) ax.plot(num_bins, pdf, 'k', linestyle='--') elif distribution == 'normal': #Fit normal curve mean, std = norm.fit(mean_onstatetime) pdf = norm.pdf(num_bins, mean, std) #sigma=shape, mu=np.log(scale) ax.plot(num_bins, pdf, 'k', linestyle='--') #Edit plot plt.xlabel('Mean on-state time (s)', fontname='Arial', fontsize=12) plt.ylabel('Probability density', fontname='Arial', fontsize=12) plt.xticks(fontname='Arial', fontsize=12) plt.yticks(fontname='Arial', fontsize=12) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) text(0.75, 0.95, 'μ=' + str(round(mean, 2)) + ' s', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontname='Arial', fontsize=12) text(0.40, 0.95, 'N=' + str(n_molecules), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontname='Arial', fontsize=12) plt.savefig(folder + '/Figures/PDFs' + '/' + figure_name + '.pdf', dpi=500) plt.savefig(folder + '/Figures/PNGs' + '/' + figure_name + '.png', dpi=500) return (plt.show())
def __init__(self, a, b, n, name, pa=0.1, pb=0.9, lognormal=False, Plot=True): mscale.register_scale(ProbitScale) if Plot: fig = plt.figure(facecolor="white") ax1 = fig.add_subplot(121, axisbelow=True) ax2 = fig.add_subplot(122, axisbelow=True) ax1.set_xlabel(name) ax1.set_ylabel("ECDF and Best Fit CDF") prop = matplotlib.font_manager.FontProperties(size=8) if lognormal: sigma = (log(b) - log(a)) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5)) mu = log(a) - erfinv(2 * pa - 1) * sigma * (2 ** 0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: lognorm.ppf(v, sigma, scale=exp(mu)), cdf) x = lognorm.rvs(sigma, scale=exp(mu), size=n) x.sort() print "generating lognormal %s, p50 %0.3f, size %s" % (name, exp(mu), n) x_s, ecdf_x = ecdf(x) best_fit = lognorm.cdf(x, sigma, scale=exp(mu)) if Plot: ax1.set_xscale("log") ax2.set_xscale("log") hist_y = lognorm.pdf(x_s, std(log(x)), scale=exp(mu)) else: sigma = (b - a) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5)) mu = a - erfinv(2 * pa - 1) * sigma * (2 ** 0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: norm.ppf(v, mu, scale=sigma), cdf) print "generating normal %s, p50 %0.3f, size %s" % (name, mu, n) x = norm.rvs(mu, scale=sigma, size=n) x.sort() x_s, ecdf_x = ecdf(x) best_fit = norm.cdf((x - mean(x)) / std(x)) hist_y = norm.pdf(x_s, loc=mean(x), scale=std(x)) pass if Plot: ax1.plot(ppf, cdf, "r-", linewidth=2) ax1.set_yscale("probit") ax1.plot(x_s, ecdf_x, "o") ax1.plot(x, best_fit, "r--", linewidth=2) n, bins, patches = ax2.hist(x, normed=1, facecolor="green", alpha=0.75) bincenters = 0.5 * (bins[1:] + bins[:-1]) ax2.plot(x_s, hist_y, "r--", linewidth=2) ax2.set_xlabel(name) ax2.set_ylabel("Histogram and Best Fit PDF") ax1.grid(b=True, which="both", color="black", linestyle="-", linewidth=1) # ax1.grid(b=True, which='major', color='black', linestyle='--') ax2.grid(True) return
flowsdf = pd.read_csv(flows_data_file_path) flowsdf.columns = ["Detector", "Flow"] #flowsdf.columns = ["Detector","Flow","Speed"] #flowsdf = flowsdf[["Detector","Flow"]] taz_root = get_taz_xml_root(camerasdf) #camerasdf should have closest edge taz_file_path = norm_path(path_join(origin_path, "wdc.taz.xml")) with open(taz_file_path, 'w') as f: f.write(etree.tostring(taz_root, pretty_print=True)) for _u in mu: for _sig in sigma: lnpdf = lambda x: lognorm.pdf(x, s=_sig, loc=0, scale=exp(_u)) #b = np.array(get_b_vector_lognorm(flowsdf, distdf, lnpdf, 30)) # we use lognormal results, 30 for 30 minutes b = np.array(get_b_vector_lognorm(flowsdf, distdf, lnpdf)) for _k in K: parameter_config_str = str(no_of_cameras) + "cameras_u" + str(_u) + \ "_std" + str(_sig) + "_" + str(_k) + "sp" kshortest_paths_dump_path = norm_path( path_join( origin_path, str(no_of_cameras) + "_cameras_" + str(_k) + "_shortest_paths.dump")) a = get_k_shortest_paths(kshortest_paths_dump_path, no_of_cameras, _k, G)
from scipy.stats import lognorm import matplotlib.pyplot as plt # # Parâmetros da distribuição vtMu = [0, 1, 2] # Valores de média da Gaussiana vtVar = [1, 5, 13] # Valores de variância da Gaussiana x = np.arange(-20, 20, 0.1) # # Variando a média e plotando os gráficos plt.figure(1, [15, 5]) sigma = np.sqrt(vtVar[0]) for il in range(0, len(vtMu)): mu = vtMu[il] plt.subplot(2, 2, 1) plt.plot(x, lognorm.pdf(x, s=sigma, scale=np.exp(mu)), label='Média = {}'.format(mu)) plt.subplot(2, 2, 2) plt.plot(x, lognorm.cdf(x, s=sigma, scale=np.exp(mu)), label='Média = {}'.format(mu)) # Variando a variância e plotando os gráficos mu = vtMu[0] for il in range(0, len(vtVar)): sigma = np.sqrt(vtVar[il]) plt.subplot(2, 2, 3) plt.plot(x, lognorm.pdf(x, s=sigma, scale=np.exp(mu)), label='$\sigma$ = {:01.2f}'.format(sigma)) plt.subplot(2, 2, 4) plt.plot(x,
def ProcessPrior(Prior, AllObs, DAll, Obs, D, ShowFigs, E, DebugMode): #%% 1 handle input prior information #% note that A0min is refined for inclusion in the "jmp" variable at the bottom allA0min = empty((DAll.nR, 1)) for i in range(0, DAll.nR): if min(AllObs.dA[i, :]) >= 0: allA0min[i, 0] = 1e-3 else: allA0min[i, 0] = -min(AllObs.dA[i, :]) + 1e-3 Obs.hmin = Obs.h.min(1) AllObs.hmin = AllObs.h.min(1) A0u = ones((DAll.nR, 1)) * 0.27 * (Prior.meanQbar**.39) * 7.2 * ( Prior.meanQbar**0.5) #Moody & Troutman A0 #%% 2 friction coefficient meanx1 = empty((D.nR, 1)) meanna = empty((D.nR, 1)) for r in range(0, DAll.nR): if E.nOpt == 3: meanx1[r] = -0.1 meanna[r] = 0.04 covx1 = 0.25 covna = 0.05 elif E.nOpt == 4: meanx1[r] = -0.25 covx1 = 1 meanna[r] = 0.04 covna = .05 elif E.nOpt == 5: covd = 0.3 #Moody and troutman meanx1[r] = A0u[r] / mean(AllObs.w[r, :]) * covd covx1 = 0.5 meanna[r] = 0.03 covna = 0.05 #%% 3 initial probability calculations v = (covna * meanna)**2 [mun, sigman] = logninvstat(meanna, v) v = (covx1 * meanx1)**2 [mux1, sigmax1] = logninvstat(meanx1, v) v = (Prior.covQbar * Prior.meanQbar)**2 [muQbar, sigmaQbar] = logninvstat(Prior.meanQbar, v) #%% chain setup N = int(1e4) if DebugMode: N = int(1e3) Nburn = int(N * .2) for r in range(0, D.nR): if A0u[r] < allA0min[r]: A0u[r] = allA0min[r] + 1 nau = meanna x1u = meanx1 z1 = randn(DAll.nR, N) z2 = randn(DAll.nR, N) z3 = randn(DAll.nR, N) u1 = rand(DAll.nR, N) u2 = rand(DAll.nR, N) u3 = rand(DAll.nR, N) na1 = zeros((D.nR, 1)) na2 = zeros((D.nR, 1)) na3 = zeros((D.nR, 1)) thetaAllA0 = empty((DAll.nR, N)) for r in range(0, DAll.nR): thetaAllA0[r, 0] = A0u[r] thetana = empty((DAll.nR, N)) for r in range(0, DAll.nR): thetana[r, 0] = nau[r] thetax1 = empty((DAll.nR, N)) for r in range(0, DAll.nR): thetax1[r, 0] = x1u[r] thetaQ = empty((DAll.nR, N)) f = empty((DAll.nR, N)) jstdA0s = empty((D.nR, N)) jstdnas = empty((D.nR, N)) jstdx1s = empty((D.nR, N)) #%% chain calculations tic = time.process_time() for j in range(0, DAll.nR): # for j in range(0,1): print("Processing prior for reach", j + 1, "/", D.nR, ".") A0u = thetaAllA0[j, 0] nau = thetana[j, 0] x1u = thetax1[j, 0] jstdA0 = A0u jstdna = nau jstdx1 = 0.1 * x1u jtarget = 0.5 Au = A0u + AllObs.dA[j, :] Abaru = median(Au) if Prior.Geomorph.Use: pu1A = lognorm.pdf(Abaru, Prior.Geomorph.logA0_sigma, 0, exp(Prior.Geomorph.logA0_hat)) else: pu1A = 1 pu1 = 1 pu2 = lognorm.pdf(nau, sigman[j], 0, exp(mun[j])) if E.nOpt < 5: pu3 = lognorm.pdf(-x1u, sigmax1[j], 0, exp(mux1[j])) elif E.nOpt == 5: pu3 = lognorm.pdf(x1u, sigmax1[j], 0, exp(mux1[j])) nhatu = calcnhat(AllObs.w[j, :], AllObs.h[j, :], AllObs.hmin[j], A0u + AllObs.dA[j, :], x1u, nau, E.nOpt) Qu = mean(1 / nhatu * (Au)**(5 / 3) * AllObs.w[j, :]**(-2 / 3) * AllObs.S[j, :]**0.5) fu = lognorm.pdf(Qu, sigmaQbar, 0, exp(muQbar)) for i in range(0, N): #adaptation if i < N * 0.2 and i > 0 and i % 100 == 0: jstdA0 = mean(jstdA0s[j, 0:i - 1]) / jtarget * (na1[j] / i) jstdna = mean(jstdnas[j, 0:i - 1]) / jtarget * (na2[j] / i) jstdx1 = mean(jstdx1s[j, 0:i - 1]) / jtarget * (na3[j] / i) jstdA0s[j, i] = jstdA0 #this part is very messy jstdnas[j, i] = jstdna jstdx1s[j, i] = jstdx1 #A0 A0v = A0u + z1[j, i] * jstdA0 Av = A0v + AllObs.dA[j, :] Abarv = median(Av) if A0v < allA0min[j]: pv1 = 0 fv = 0 pv1A = 0 else: pv1 = 1 Qv = mean(1 / nhatu * (Av)**(5 / 3) * AllObs.w[j, :]**(-2 / 3) * AllObs.S[j, :]**0.5) fv = lognorm.pdf(Qv, sigmaQbar, 0, exp(muQbar)) if Prior.Geomorph.Use: pv1A = lognorm.pdf(Abarv, Prior.Geomorph.logA0_sigma, 0, exp(Prior.Geomorph.logA0_hat)) else: pv1A = 1 MetRatio = fv / fu * pv1 / pu1 * pv1A / pu1A if MetRatio > u1[j, i]: na1[j] = na1[j] + 1 A0u = A0v Au = Av Qu = Qv fu = fv pu1 = pv1 pu1A = pv1A #na nav = nau + z2[j, i] * jstdna if nav <= 0: pv2 = 0 else: pv2 = lognorm.pdf(nav, sigman[j], 0, exp(mun[j])) nhatv = calcnhat(AllObs.w[j, :], AllObs.h[j, :], AllObs.hmin[j], A0u + AllObs.dA[j, :], x1u, nav, E.nOpt) Qv = mean(1 / nhatv * (Au)**(5 / 3) * AllObs.w[j, :]**(-2 / 3) * AllObs.S[j, :]**0.5) fv = lognorm.pdf(Qv, sigmaQbar, 0, exp(muQbar)) MetRatio = fv / fu * pv2 / pu2 if MetRatio > u2[j, i]: na2[j] = na2[j] + 1 nau = nav Qu = Qv fu = fv pu2 = pv2 #x1 x1v = x1u + z3[j, i] * jstdx1 if E.nOpt < 5: if x1v >= 0: pv3 = 0 else: pv3 = lognorm.pdf(-x1v, sigmax1[j], 0, exp(mux1[j])) elif E.nOpt == 5: if x1v < 0: pv3 = 0 else: pv3 = lognorm.pdf(x1v, sigmax1[j], 0, exp(mux1[j])) nhatv = calcnhat(AllObs.w[j, :], AllObs.h[j, :], AllObs.hmin[j], A0u + AllObs.dA[j, :], x1v, nau, E.nOpt) Qv = mean(1 / nhatv * (Au)**(5 / 3) * AllObs.w[j, :]**(-2 / 3) * AllObs.S[j, :]**0.5) fv = lognorm.pdf(Qv, sigmaQbar, 0, exp(muQbar)) MetRatio = fv / fu * pv3 / pu3 if MetRatio > u3[j, i]: na3[j] = na3[j] + 1 x1u = x1v Qu = Qv fu = fv pu3 = pv3 thetaAllA0[j, i] = A0u thetana[j, i] = nau thetax1[j, i] = x1u thetaQ[j, i] = Qu f[j, i] = fu toc = time.process_time() print('Prior MCMC Time: %.2fs' % (toc - tic)) #%% 4. Calculating final prior parameters Prior.meanAllA0 = mean(thetaAllA0[:, Nburn + 1:N], axis=1) Prior.stdAllA0 = std(thetaAllA0[:, Nburn + 1:N], axis=1) Prior.meanna = mean(thetana[:, Nburn + 1:N], axis=1) Prior.stdna = std(thetana[:, Nburn + 1:N], axis=1) Prior.meanx1 = mean(thetax1[:, Nburn + 1:N], axis=1) Prior.stdx1 = std(thetax1[:, Nburn + 1:N], axis=1) #%% 5. calculate minimum values for A0 for the estimation window #5.1 calculate minimum values for A0 for the estimation window estA0min = empty((D.nR, 1)) for i in range(0, D.nR): if min(Obs.dA[i, :]) >= 0: estA0min[i, :] = 0 else: estA0min[i, :] = -min(Obs.dA[i, :]) #5.3 shift the "all" A0 into the estimate window AllObs.A0Shift = AllObs.dA[:, E.iEst[ 0]] #different than the Matlab version... should be ok? #5.4 save the more restrictive limit Amin = 1 #this is the lowest value that we will let A0+dA take jmp = Jump() jmp.A0min = maximum(allA0min.T + AllObs.A0Shift, estA0min.T) + Amin jmp.nmin = 0.001 #5.5 set up prior A0 variable by shifting into estimation window Prior.meanA0 = Prior.meanAllA0 + AllObs.A0Shift Prior.stdA0 = Prior.stdAllA0 return Prior, jmp
target_seq = [] i= 0 id_list = list(record_dict.keys()) seq_num = len(id_list) while read_length_count <= target_read_length: print read_length_count if i == seq_num: i = 0 record_id = id_list[i] record = record_dict[record_id] rand = random.random() if (not(read_dict[record.id])): #print "haha" print record.id dist_value = lognorm.pdf(len(record.seq), sigma, loc, scale) if 0<=rand<= ratio*dist_value/background: # print ratio*dist_value/background target_seq.append(record) read_dict[record.id] = True read_length_count += len(record.seq) i += 1 SeqIO.write(target_seq, "D:/Data/20161229/target.fastq", "fastq") """ x_fit = np.linspace(data.min(),data.max(),100) pdf_fitted = lognorm.pdf(x_fit, sigma, loc, scale)
sigma = 0.03 T = 10 fig, ax = plt.subplots() n, bins, patches = ax.hist(rand_nums[:, 0], bins=num_bins, density=True) ax.plot(bins, norm.pdf(bins), '-') fig.suptitle('Standard normal distribution for t=0') #%% # The distibution of the account value at ``t=120`` follows a log normal distribution. # In the expression below, :math:`S_{T}` and :math:`S_{0}` denote the account value # at ``t=T=120`` and ``t=0`` respectively. # # .. math:: # # \ln\frac{S_{T}}{S_{0}}\sim\phi\left[\left(r-\frac{\sigma^{2}}{2}\right)T, \sigma\sqrt{T}\right] # # The graph shows how well the distribution of :math:`e^{-rT}S_{T}`, the present # values of the account value at `t=0`, fits the PDF of a log normal ditribution. # # Reference: *Options, Futures, and Other Derivatives* by John C.Hull # # .. seealso:: # # * :doc:`/libraries/notebooks/savings/savings_example1` notebook in the :mod:`~savings` library fig, ax = plt.subplots() n, bins, patches = ax.hist(pv_avs, bins=num_bins, density=True) ax.plot(bins, lognorm.pdf(bins, sigma * T**0.5, scale=S0), '-') fig.suptitle('PV of account value at t=120')
logmu = math.log( ( mu**2) / math.sqrt( var + mu**2 ) ) # location parameter of annual peak flow logsigma = math.sqrt( math.log( var / ( mu**2 ) + 1 ) ) # scale parameter of annual peak flow expmu = math.exp(logmu) print mu, var, sd, logmu, logsigma, expmu # Converting the location parameter logmu and the scale parameter logsigma of a lognormal distribution to mean and variance #logmu = 4.45142783611 # the bigger the lower peak probability and longer tail #logsigma = 0.554513029376 # the bigger the lower peak probability and longer tail #expmu = math.exp(logmu) #mu = math.exp(logmu + logsigma**2/2) #var = (math.exp(logsigma**2) - 1) * math.exp(2*logmu + logsigma**2) #sd = math.sqrt(var) #print mu, var, sd, logmu, logsigma plt.plot(inteq, lognorm.pdf(inteq, logsigma, loc = logmu, scale = expmu), lw=6, ls = 'solid', color = 'blue', alpha=0.6, label='Climate Scenario A1') plt.axis([QCMIN, QCMAX, 0.0, 0.01]) plt.xlabel('Annual Flood Flow (m^3/s)') plt.ylabel('Probability') #plt.title('Lognormal Distribution of Annual Flood Flow') #plt.axis('off') # turns off the axis lines and labels. #http://matplotlib.org/api/pyplot_api.html #linewidth or lw float value in points #linestyle or ls ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] # If log(x) is normally distributed with mean logmu and variance logsigma**2, # then x is log-normally distributed with shape parameter logsigma and scale parameter exp(logmu).
pyplot.plot(x, norm.pdf(x), 'k-') pyplot.show() # 可以通过 loc 和 scale 来调整这些参数,一种方法是调用相关函数时进行输入: x = linspace(-3, 3, 50) p = pyplot.plot(x, norm.pdf(x, loc=0, scale=1)) p = pyplot.plot(x, norm.pdf(x, loc=0.5, scale=2)) p = pyplot.plot(x, norm.pdf(x, loc=-0.5, scale=.5)) pyplot.show() # 不同参数的对数正态分布: from scipy.stats import lognorm x = linspace(0.01, 3, 100) pyplot.plot(x, lognorm.pdf(x, 1), label='s=1') pyplot.plot(x, lognorm.pdf(x, 2), label='s=2') pyplot.plot(x, lognorm.pdf(x, .1), label='s=0.1') pyplot.legend() pyplot.show() # 离散分布 from scipy.stats import randint # 离散均匀分布的概率质量函数(PMF): high = 10 low = -10 x = arange(low, high + 1, 0.5) p = pyplot.stem(x, randint(low, high).pmf(x)) # 杆状图
ax[0].set_title('Histogram of Pickups - Normal Scale') # create a vector to hold num of pickups v = dftaxi.num_pickups # plot the histogram with 30 bins v[~((v - v.median()).abs() > 3 * v.std())].hist(bins=30, ax=ax[1]) ax[1].set_xlabel('Num of pickups') ax[1].set_ylabel('Count') ax[1].set_title('Histogram of Num of pickups - Scaled') # apply a lognormal fit. Use the mean of trip distance as the scale parameter scatter, loc, mean = lognorm.fit(dftaxi.num_pickups.values, scale=dftaxi.num_pickups.mean(), loc=0) pdf_fitted = lognorm.pdf(np.arange(0, 12, .1), scatter, loc, mean) ax[1].plot(np.arange(0, 12, .1), 600000 * pdf_fitted, 'r') ax[1].legend(['data', 'lognormal fit']) plt.show() #Pickup density pickup_xaxis = dftaxi.long pickup_yaxis = dftaxi.lat sns.set_style('white') fig, ax = plt.subplots(figsize=(11, 12)) ax.set_axis_bgcolor('black') ax.scatter(pickup_xaxis, pickup_yaxis, s=7, color='lightpink', alpha=0.7) ax.set_xlim([-74.03, -73.90]) ax.set_ylim([40.63, 40.85]) ax.set_xlabel("Longitude", fontsize=12)
rnd_a += np.random.normal(0,noise_size,1000) rnd_b += np.random.normal(0,noise_size,1000) rnd_L += np.random.normal(0,noise_size,1000) cntr_a = a_pchip(cmpos) cntr_b = b_pchip(cmpos) cntr_L = L_pchip(cmpos) delta_a = cntr_a - rnd_a delta_b = cntr_b - rnd_b delta_L = cntr_L - rnd_L delta_E = sqrt(square(delta_a) +square(delta_b) + square(delta_L)) # plot them green rndplt=ax.scatter3D(rnd_a,rnd_b,rnd_L,marker='*',c='green',s=50,linewidth=1) # histogramm delta E plt.figure() n, bins, patches = plt.hist(delta_E,bins=50,color='blue',normed=True,histtype='bar') lnrm_shape, lnrm_loc, lnrm_scale = lognorm.fit(delta_E) x= np.linspace(0, delta_E.max(), num=400) y = lognorm.pdf(x,lnrm_shape,loc=lnrm_loc,scale=lnrm_scale) pdflne=plt.plot(x,y,'r--',linewidth=2)
def PDFm(data, nPoint, dist='normal', mu=0, sigma=1, analitica=False, lim=None): import numpy as np from scipy.interpolate import interp1d from scipy.stats import norm, lognorm eps = 5e-5 if not analitica: yest, xest = np.histogram(data, bins='fd', density=True) xest = np.mean(np.array([xest[:-1], xest[1:]]), 0) M = np.where(yest == max(yest))[0][0] m = np.where(yest == min(yest))[0][0] if M: interpL = interp1d(yest[:M + 1], xest[:M + 1], fill_value='extrapolate') interpH = interp1d(yest[M:], xest[M:]) y1 = np.linspace(yest[m] + eps, yest[M], nPoint // 2 + 1) x1 = interpL(y1) y2 = np.flip(y1, 0) x2 = interpH(y2) x = np.concatenate([x1[:-1], x2]) y = np.concatenate([y1[:-1], y2]) else: interp = interp1d(yest, xest, fill_value='extrapolate') if not nPoint % 2: nPoint = nPoint + 1 y = np.linspace(yest[M], yest[m], nPoint) x = interp(y) else: inf, sup = lim[0], lim[1] if dist == 'normal': #inf, sup = norm.interval(0.9999, loc = mu, scale = sigma) X1 = np.linspace(inf, mu, int(1e6)) Y1 = norm.pdf(X1, loc=mu, scale=sigma) interp = interp1d(Y1, X1) y1 = np.linspace(Y1[0], Y1[-1], nPoint // 2 + 1) x1 = interp(y1) X2 = np.linspace(mu, sup, int(1e6)) Y2 = norm.pdf(X2, loc=mu, scale=sigma) interp = interp1d(Y2, X2) y2 = np.linspace(Y2[0], Y2[-1], nPoint // 2 + 1) #y2 = np.flip(y1,0) x2 = interp(y2) elif dist == 'lognormal': mode = np.exp(mu - sigma**2) X1 = np.linspace(inf, mode, int(1e6)) Y1 = lognorm.pdf(X1, sigma, loc=0, scale=np.exp(mu)) interp = interp1d(Y1, X1) y1 = np.linspace(Y1[0], Y1[-1], nPoint // 2 + 1) x1 = interp(y1) X2 = np.linspace(mode, sup, int(1e6)) Y2 = lognorm.pdf(X2, sigma, loc=0, scale=np.exp(mu)) interp = interp1d(Y2, X2) y2 = np.linspace(Y2[0], Y2[-1], nPoint // 2 + 1) #y2 = np.flip(y1,0) x2 = interp(y2) x = np.concatenate([x1[:-1], x2]) return x
def distance_metric(self, statistic='all', verbose=False, plot_kwargs1={'color': 'b', 'marker': 'D', 'label': '1'}, plot_kwargs2={'color': 'g', 'marker': 'o', 'label': '2'}, save_name=None): ''' Calculate the distance. *NOTE:* The data are standardized before comparing to ensure the distance is calculated on the same scales. Parameters ---------- statistic : 'all', 'hellinger', 'ks', 'lognormal' Which measure of distance to use. labels : tuple, optional Sets the labels in the output plot. verbose : bool, optional Enables plotting. plot_kwargs1 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset1`. plot_kwargs2 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset2`. save_name : str,optional Save the figure when a file name is given. ''' if statistic is 'all': self.compute_hellinger_distance() self.compute_ks_distance() # self.compute_ad_distance() if self._do_fit: self.compute_lognormal_distance() elif statistic is 'hellinger': self.compute_hellinger_distance() elif statistic is 'ks': self.compute_ks_distance() elif statistic is 'lognormal': if not self._do_fit: raise Exception("Fitting must be enabled to compute the" " lognormal distance.") self.compute_lognormal_distance() # elif statistic is 'ad': # self.compute_ad_distance() else: raise TypeError("statistic must be 'all'," "'hellinger', 'ks', or 'lognormal'.") # "'hellinger', 'ks' or 'ad'.") if verbose: import matplotlib.pyplot as plt defaults1 = {'color': 'b', 'marker': 'D', 'label': '1'} defaults2 = {'color': 'g', 'marker': 'o', 'label': '2'} for key in defaults1: if key not in plot_kwargs1: plot_kwargs1[key] = defaults1[key] for key in defaults2: if key not in plot_kwargs2: plot_kwargs2[key] = defaults2[key] if self.normalization_type == "standardize": xlabel = r"z-score" elif self.normalization_type == "center": xlabel = r"$I - \bar{I}$" elif self.normalization_type == "normalize_by_mean": xlabel = r"$I/\bar{I}$" else: xlabel = r"Intensity" # Print fit summaries if using fitting if self._do_fit: try: print(self.PDF1._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 1!") try: print(self.PDF2._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 2!") # PDF plt.subplot(121) plt.semilogy(self.bin_centers, self.PDF1.pdf, color=plot_kwargs1['color'], linestyle='none', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) plt.semilogy(self.bin_centers, self.PDF2.pdf, color=plot_kwargs2['color'], linestyle='none', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: # Plot the fitted model. vals = np.linspace(self.bin_centers[0], self.bin_centers[-1], 1000) fit_params1 = self.PDF1.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-') fit_params2 = self.PDF2.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-') plt.grid(True) plt.xlabel(xlabel) plt.ylabel("PDF") plt.legend(frameon=True) # ECDF ax2 = plt.subplot(122) ax2.yaxis.tick_right() ax2.yaxis.set_label_position("right") if self.normalization_type is not None: ax2.plot(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.plot(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.plot(vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-',) ax2.plot(vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-',) else: ax2.semilogx(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.semilogx(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.semilogx(vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-',) ax2.semilogx(vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-',) plt.grid(True) plt.xlabel(xlabel) plt.ylabel("ECDF") plt.tight_layout() if save_name is not None: plt.savefig(save_name) plt.close() else: plt.show() return self
from scipy.stats import lognorm print(lognorm.pdf(1,0.5**2,0,1))