def get_lognormal_effluent(): df = pd.read_excel('./model_WWT/SSD_effluent.xlsx', parse_dates=['Date'], index_col='Date') df = df.dropna() df = df[(df.iloc[:, 0] < 115)] mask = (df != 0).any(axis=1) df = df.loc[mask] df_min, df_max = min(df.iloc[:, 0]), max(df.iloc[:, 0]) df['month'] = df.index.month # df['year'] = df.index.year # eff_np = np.zeros((12,2020-1990)) # for i in range (2020-1990): # for j in range(12): # eff_np[j,i] = df[(df.year==1990+i) & (df.month==1+j)].iloc[:,0].mean() sigma = [] mu = [] for i in range(12): data = df[(df.month == i + 1)].iloc[:, 0] parm = lognorm.fit(data, floc=0) sigma.append(parm[0]) mu.append(np.log(parm[2])) # # mean = np.exp(mu + 1/2*(sigma**2)) # mean_data = data.mean() # median = np.exp(mu) # cv = np.sqrt(np.exp(sigma**2) - 1) # sd = mean*np.sqrt(np.exp(sigma**2) - 1) data = df.iloc[:, 0] parm = lognorm.fit(data, floc=0) sigma.append(parm[0]) mu.append(np.log(parm[2])) return mu, sigma
class TestQDM(NumpyTestCase.NumpyTestCase): badinput = 0.5 nanarray = np.array([1, 2, 3, 4, np.nan]) obsdist = lognorm.rvs(0.57, size=100) obsp = lognorm.fit(obsdist) refdist = lognorm.rvs(0.45, size=100) refp = lognorm.fit(refdist) futdist = lognorm.rvs(0.55, size=100) futp = lognorm.fit(futdist) x = np.linspace(0, 1, 101) qobs = np.quantile(obsdist, x) qref = np.quantile(refdist, x) qfut = np.quantile(futdist, x) def testQDMInput(self): """Test input is array-like""" self.assertRaises(TypeError, qdm, 0.5, 0.5, 0.5) def testQDMNanInput(self): """Test input array has no nan values""" self.assertRaises(ValueError, qdm, self.nanarray, self.nanarray, self.nanarray) def testRefInput(self): """Test using reference data as future returns obs dist params""" testqfut = qdm(self.obsdist, self.refdist, self.refdist) testp = lognorm.fit(testqfut) self.assertAlmostEqual(self.obsp[0], testp[0], places=2) self.assertAlmostEqual(self.obsp[1], testp[1], places=2) self.assertAlmostEqual(self.obsp[2], testp[2], places=2)
def dataHistogram(self): #Code for plotting the histogram of number of pickups plt.hist(self.dftaxi.num_pickups,normed=True, bins=5) plt.ylabel('Frequency') plt.title("Unscaled - Number of Pickups") plt.show() # define the figure with 2 subplots fig,ax = plt.subplots(1,2,figsize = (15,4)) print("the unscaled graph is not representative and hence we go for scaling ") #if data is skewed negative binomial will perform better than poisson # histogram of the number of pickups self.dftaxi.num_pickups.hist(bins=30,ax=ax[0]) ax[0].set_xlabel('Num of Pickups') ax[0].set_ylabel('Count') ax[0].set_yscale('log') ax[0].set_title('Histogram of Pickups - Normal Scale') # create a vector to hold num of pickups v = self.dftaxi.num_pickups # plot the histogram with 30 bins v[~((v-v.median()).abs()>3*v.std())].hist(bins=30,ax=ax[1]) ax[1].set_xlabel('Num of pickups') ax[1].set_ylabel('Count') ax[1].set_title('Histogram of Num of pickups - Scaled') print("A scaled graph is being plotted instead...!") print("\n") # apply a lognormal fit. Use the mean of trip distance as the scale parameter scatter,loc,mean = lognorm.fit(self.dftaxi.num_pickups.values,scale=self.dftaxi.num_pickups.mean(),loc=0) pdf_fitted = lognorm.pdf(np.arange(0,12,.1),scatter,loc,mean) ax[1].plot(np.arange(0,12,.1),600000*pdf_fitted,'r') ax[1].legend(['data','lognormal fit']) plt.show()
def testRefInput(self): """Test using reference data as future returns obs dist params""" testqfut = qdm(self.obsdist, self.refdist, self.refdist) testp = lognorm.fit(testqfut) self.assertAlmostEqual(self.obsp[0], testp[0], places=2) self.assertAlmostEqual(self.obsp[1], testp[1], places=2) self.assertAlmostEqual(self.obsp[2], testp[2], places=2)
def test_fa(self): T = 10 q = generic.fa(self.da, T, 'lognorm') p0 = lognorm.fit(self.da.values[:, 0, 0]) q0 = lognorm.ppf(1 - 1. / T, *p0) np.testing.assert_array_equal(q[0, 0, 0], q0)
def logdistplot(d, **kwargs): import seaborn as sns from scipy.stats import lognorm ax = sns.distplot(d, fit=lognorm, **kwargs) shape, loc, scale = lognorm.fit(d) ax.set_title('Fit mode: {}'.format(loc + np.exp(np.log(scale) - shape**2))) plt.show()
def returnDistData(cls, self): gammaParam = gamma.fit(10**(self.data / 10)) gammaDist = gamma.pdf(self.data, *gammaParam) rayleighParam = rayleigh.fit(self.data) rayleighDist = rayleigh.pdf(self.data, *rayleighParam) normParam = norm.fit(self.data) normDist = norm.pdf(self.data, *normParam) logNormParam = lognorm.fit(self.data) lognormDist = lognorm.pdf(self.data, *logNormParam) nakagamiParam = nakagami.fit(self.data) nakagamiDist = nakagami.pdf(self.data, *nakagamiParam) exponParam = expon.fit(self.data) exponDist = expon.pdf(self.data, *exponParam) exponweibParam = exponweib.fit(self.data) weibDist = exponweib.pdf(self.data, *exponweibParam) distDF = pd.DataFrame(np.column_stack([ gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist, exponDist, weibDist ]), columns=[ 'gammaDist', 'rayleighDist', 'normDist', 'lognormDist', 'nakagamiDist', 'exponDist', 'weibDist' ]) self.distDF = distDF
def distfit(n,dists,title,width,height,fwhm,dm,samples=1000): from scipy.stats import lognorm bins_h = int(height * 60. / 8.) bins_w = int(width * 60. / 8.) sig = ((bins_w/width)*fwhm)/2.355 valsLP = [] for i in range(samples) : random_ra = width*np.random.random_sample((n,)) random_dec = height*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins_h,bins_w], range=[[0,height],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) valsLP.append(S_r[x_cent_r][y_cent_r]) x = np.linspace(2, 22, 4000) bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. al,loc,beta=lognorm.fit(valsLP) pct = 100.0*lognorm.cdf(dists, al, loc=loc, scale=beta) print 'Significance of detection:','{0:6.3f}%'.format(pct)
def test_fa(self): T = 10 q = generic.fa(self.da, T, "lognorm") assert "return_period" in q.coords p0 = lognorm.fit(self.da.values[:, 0, 0]) q0 = lognorm.ppf(1 - 1.0 / T, *p0) np.testing.assert_array_equal(q[0, 0, 0], q0)
def logNormal(dset,a,b,c,d,e,f,g): global shape,loc,scale,distro,pdf try: plt.xscale(f) except ValueError: print "Specify the type of scale for the x axis" return if g == "linspace": distro = np.linspace(a,b,c) elif g == "logspace": distro = np.logspace(a,b,c) for x in range (0,len(distro)): #distro[x] = distro[x] + 40 #adding 40 really does not matter for the overall distribution, it literally just shifts it right print "" else: print "That didn't work." return shape,loc,scale = lognorm.fit(dset) pdf = lognorm.pdf(distro, shape, loc, scale) plt.plot(distro, pdf,color=e) #formerly ax.plot plt.title(d + " PDF with data")
def response_time_dist(filename, column): """ Returns the lognormal distribution fit of travel times. """ dt = pandas.read_csv(filename) response = lognorm.fit(dt[column]) click.echo(response) return response
def fit(self, data, s=1, loc=1, scale=1): (s, loc, scale) = lognorm.fit(data) self.s, self.loc, self.scale = (s, loc, scale) self.mu = np.log(self.scale) self.sigma = self.s self.params = {'s': self.s, 'loc': self.loc, 'scale': self.scale} self.setParams(self.params) return (self.s, self.loc, self.scale)
def fn_photonflux_hist(file_name, folder, mean_photons_per_sec): """ Plots histogram of total number of and fits to lognormal distribution Inputs: data, filename and foldername which should be defined in the script """ import numpy as np import matplotlib.pyplot as plt from scipy.stats import lognorm from pylab import text n_molecules = len(mean_photons_per_sec) #Plot photon flux figure_name = file_name + '_photonsPerSecond' ax = plt.subplot(111) num_bins = np.linspace(int(min(mean_photons_per_sec)), int(max(mean_photons_per_sec)), int(np.sqrt(len(mean_photons_per_sec)) * 4)) ax.hist(mean_photons_per_sec, bins=num_bins, density=True, color='darkorange', edgecolor='black') #Fit curve sigma, loc, mean = lognorm.fit(mean_photons_per_sec, floc=0) pdf = lognorm.pdf(num_bins, sigma, loc, mean) #sigma=shape, mu=np.log(scale) ax.plot(num_bins, pdf, 'k', linestyle='--') #Edit plot plt.xlabel('Photon flux ($s^{-1}$)', fontname='Arial', fontsize=12) plt.ylabel('Probability density', fontname='Arial', fontsize=12) plt.xticks(fontname='Arial', fontsize=12) plt.yticks(fontname='Arial', fontsize=12) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) text(0.75, 0.95, 'μ=' + str(round(mean, 2)) + ' photons $s^{-1}$', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontname='Arial', fontsize=12) text(0.40, 0.95, 'N=' + str(n_molecules), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontname='Arial', fontsize=12) plt.savefig(folder + '/Figures/PDFs' + '/' + figure_name + '.pdf', dpi=500) plt.savefig(folder + '/Figures/PNGs' + '/' + figure_name + '.png', dpi=500) return (plt.show())
def createHisto(histoData1, histoData2, imageName1, imageName2): """ Creates a diagram showing two histograms. """ fig = plt.figure() plt.subplot(111) data1 = histoData1['data'] data2 = histoData2['data'] n, bins, patches = plt.hist(data1, _NUMBER_OF_HISTO_BARS, range=(0, data1.max()), normed=0, \ weights=np.zeros_like(data1)+1./data1.size, facecolor=_COLOR_FIRST_DATA[0], alpha=0.4, label=imageName1) n2, bins2, patches = plt.hist(data2, _NUMBER_OF_HISTO_BARS, range=(0, data2.max()), normed=0, \ weights=np.zeros_like(data2)+1./data2.size, facecolor=_COLOR_SECOND_DATA[0], alpha=0.4, label=imageName2) # 'best fit' line shape, loc, scale = lognorm.fit(data1, floc=0) # Fit a curve to the variates maximum = data1.max() if data1.max()>data2.max() else data2.max() x = np.linspace(0, 1.2 * maximum, num=500) # scaling binlength = bins[1] - bins[0] alpha = factorize(n, binlength) shape2, loc2, scale2 = lognorm.fit(data2, floc=0) # Fit a curve to the variates # scaling binlength2 = bins2[1] - bins2[0] alpha2 = factorize(n2, binlength2) # plot functions simplefilter("ignore", RuntimeWarning) # avoid warning in this method # plt.plot(bins[1:], n, 'b^', alpha=0.5) plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), _COLOR_FIRST_DATA[1]+'--') # plt.plot(bins2[1:], n2, 'g^', alpha=0.5) plt.plot(x, alpha2 * (lognorm.pdf(x, shape2, loc=0, scale=scale2)), _COLOR_SECOND_DATA[1]+'--') axe = plt.axis() newaxe =(axe[0], 1.2 * maximum, axe[2], axe[3]) plt.axis(newaxe) plt.title(histoData1['title']) plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$') plt.xlabel(histoData1['xlabel']) simplefilter("default", RuntimeWarning) # position the legend plt.legend(loc=0, frameon=0) plt.minorticks_on() return fig
def __init__(self, mode=0, elem=None, sample=None): if mode == 0: self.s = elem[0] self.mu = elem[1] self.sigma = elem[2] else: self.s, self.mu, self.sigma = lognorm.fit(sample) self.math_average = lognorm.mean(self.s, loc=self.mu, scale=self.sigma) self.dispersion = lognorm.var(self.s, loc=self.mu, scale=self.sigma)
def lognormal(x, y): s, loc, scale = lognorm.fit(x) xmin = x.min() xmax = x.max() x = np.linspace(xmin, xmax, len(x)) pdf = lognorm.pdf(x, s, scale=scale) yres = pdf print("Sum of squared difference (log-normal): ", np.sum((y-yres)**2)) return yres
def _boots(self, df, newx, shape, scale, dist=lognorm): xr = lognorm.rvs(size=len(df['Prediction']), s=shape, loc=0, scale=scale) this_shape, this_loc, this_scale = lognorm.fit(xr, floc=0) this_fit = dist.cdf(newx, s=this_shape, loc=0, scale=this_scale) return list(this_fit)
def plot_gh_distribution(N=100, dp=2.0, sds=(1, 2, 3)): fig, ax = plt.subplots( figsize=[2 * plotdl.latex_width_inch, plotdl.latex_height_inch]) cbandwidth = 20 #N #20 tga = np.zeros([len(sds), cbandwidth]) gg = np.zeros([len(sds)]) for n, (sd, tg) in enumerate(zip(sds, tga)): m = models.Model_Anderson_DD_1d(number_of_points=N, dis_param=dp, periodic=False, bandwidth=1, prng=np.random.RandomState(sd)) ## TAKE ONLY CENTER OF BAND: crange = np.arange(N // 2 - cbandwidth // 2, N // 2 + cbandwidth // 2) #crange = (abs(m.eig_vals).copy().argsort())[:4] #debug((crange, np.arange(N//2 -cbandwidth//2, N//2 + cbandwidth//2))) ga = N * phys_functions.ga(m.eig_matrix[:, [crange]]) tg[:] = ga gg[n] = abs(phys_functions.A_matrix_inv(m.rate_matrix, 1, 1.57))**2 #p = s_cummulative_plot(ax, ga) mi, ma, theo = phys_functions.ga_cdf(lyap_gamma(1, dp), N) ghs = np.logspace(np.log10(mi), np.log10(ma) + 1, 1000) logg = logavg(tga, axis=0) s_cummulative_plot(ax, logg) debug((logg.shape, tga.shape)) p = s_cummulative_plot(ax, tga) sh, lo, sc = lognorm.fit(tga.flat) mu = np.log(sc) stdv = sh debug((sh, lo, sc)) #lg = lognorm() xspace = np.logspace(np.log10(tga.min()), np.log10(tga.max())) ax.plot(xspace, norm.cdf(np.log(xspace), [stdv], loc=mu, scale=stdv), '-.', color='cyan') ax.plot(ghs, theo(ghs), color='black') #return ghs, theo #tga_sum = np.nansum(tga,axis=1) tga_avg = np.average(tga, axis=1) ax.axvline(logavg(gg)) ax.axvline(np.average(gg), ls='--') ax.axvline(logavg(tga_avg), ls='-', color='red') ax.axvline(np.average(tga), ls='-', color='magenta') ax.axvline(logavg(tga[tga > 1e-100]), ls=':', color='red') ax.axvline(np.exp(-2 * N * lyap_gamma(1, dp)), ls='--', color='green') #ax.axvline(4*(lyap_gamma(1,dp)**2)*N*np.exp(-2*N*lyap_gamma(1,dp)), ls='--', color='green') print(2 * N * lyap_gamma(1, dp)) ax.set_xscale('log') #ax.set_xlim(1e-10, 10) ax.set_ylim(1e-20, 1) ax.set_title("W = {}, $\gamma$ = {:.2}, N = {}".format( dp, lyap_gamma(1, dp), N)) ax.xaxis.set_major_locator(LogLocator(numdecs=10)) mkdir_and_savefig(fig, 'plots/pta_gh_dist.png')
def lognorm_fit(x, xdata=None): ''' Fit a log normal distribution to the data. Uses actual input data and is slower as a result. ''' if xdata is None: xdata = x shape, loc, scale = lognorm.fit(xdata, loc=1) pdf = lognorm.pdf(x, shape, loc, scale) return pdf, np.log(scale), shape
def plot_hist(indiv, datas): x = np.array(datas) unique, counts = np.unique(x, return_counts=True) shape, loc, scale = lognorm.fit(x, floc=0) x2 = np.linspace(min(unique), max(unique)) p = lognorm.pdf(x2, shape, loc=loc, scale=scale) plt.clf() plt.hist(x, normed=True, bins=50) plt.plot(x2, p, 'k') plt.savefig("hist_{}.pdf".format(indiv))
def plotIntensityHistogram(ax, sources, freq_mhz, num_bins, label, color="#338768", fit=False): intensity_attrib = intensityAttrib(freq_mhz) #if the frequency has not been recorded, then no such column will exist so return try: peak_intensity = sources[intensity_attrib] except: print("Intensities for frequency " + str(freq_mhz) + "MHz have not been recorded") return #filtering out negative intensities peak_intensity = peak_intensity[peak_intensity > 0] #creating the log-scaled bins bin_max = np.log10(peak_intensity.max()) bin_min = np.log10(peak_intensity.min()) bins = 10**np.linspace(bin_min, bin_max, num_bins) #set axis label and x scale ax.set_xlabel(createLabel(intensity_attrib)) ax.set_ylabel("Frequency") ax.set_xscale("log") counts, bin_edges, ignored = ax.hist(peak_intensity, bins=bins, rwidth=1.0, color=color, label=str(freq_mhz) + "MHz " + label) #draw best fit logarithmic Gaussian curve if fit: shape, loc, scale = lognorm.fit(peak_intensity, floc=0) bins_log_len = np.r_[bin_edges[1:] - bin_edges[:-1], 0] # get pdf-values for same intervals as histogram samples_fit_log = lognorm.pdf(bins, shape, loc=loc, scale=scale) # plot the fit line ax.plot(bins, samples_fit_log * bins_log_len * counts.sum(), 'k-', label="Fit line " + label, linewidth=2) #display mean and std dev in a textbox mean = round(scale, 2) std = round(np.log10(shape), 2) ax.legend((dummyObj(), dummyObj()), ("Mean = " + str(mean), "SD = " + str(std))) ax.legend(loc="lower left", bbox_to_anchor=(0.1, 1.01)) print("Plotted the histogram")
def index(): req = request.json #print(req) data = req['times'] shape, loc, scale = lognorm.fit(data, floc=0) fitted = lognorm(shape, loc, scale) res = fitted.ppf(0.05) #return req return str(res)
def test_fit(self): p = generic.fit(self.da, 'lognorm') assert p.dims[0] == 'dparams' assert p.get_axis_num('dparams') == 0 p0 = lognorm.fit(self.da.values[:, 0, 0]) np.testing.assert_array_equal(p[:, 0, 0], p0) # Check that we can reuse the parameters with scipy distributions cdf = lognorm.cdf(.99, *p.values) assert cdf.shape == (self.nx, self.ny)
def continuous(): """Fit distributions to symptoms' duration data.""" # fetch data x = _symptoms_data() # fit distributions return { 'x': x, 'norm': norm.fit(x), 'lognorm': lognorm.fit(x, floc=0), 'gamma': gamma.fit(x, floc=0) }
def test_fit(self): p = generic.fit(self.da, "lognorm") assert p.dims[0] == "dparams" assert p.get_axis_num("dparams") == 0 p0 = lognorm.fit(self.da.values[:, 0, 0]) np.testing.assert_array_equal(p[:, 0, 0], p0) # Check that we can reuse the parameters with scipy distributions cdf = lognorm.cdf(0.99, *p.values) assert cdf.shape == (self.nx, self.ny) assert p.attrs["estimator"] == "Maximum likelihood"
def plot_fit(tau): norm = 1. / tau * (tau + 2. / 3.)**2. * 3. / np.pi**2. print(tau) print(norm) # Load photon data, take care of float errors in r data = np.loadtxt( '../outputs/escape/escape_photons_nphot1e6/exit_photons_tau{}.dat'. format(int(tau)), skiprows=1) data[:, 0] = np.round(data[:, 0], 5) # Set up figure, make initial histogram, normalize x and y fig, ax = plt.subplots(1, 1, dpi=180) n, bins, patches = ax.hist(data[:, 6], bins=50, color='k', histtype='step', density=True, range=(0, 15)) bins = bins / norm n = n * norm # Calculate new bin positions, check normalization sum, clear old histogram bincenters = 0.5 * (bins[1:] + bins[:-1]) print(np.sum(n * np.diff(bins))) plt.cla() # Scatter plot of the new bin positions and normalized counts ax.scatter(bincenters, n, color='k', s=3) ax.set_xlabel('Distance') ax.set_ylabel('n (normalized)') ax.set_yscale('log') ax.set_title(r'Total Distance Traveled, $\tau = {}$, $n = 10^6$'.format( int(tau))) # Calculate probability density from Shane's series solution code prob = np.zeros(np.shape(bincenters)) for i in range(len(bincenters)): prob[i] = prob_ct_tau(bincenters[i], tau) ax.plot(bincenters, prob, 'b--', label='Series Solution', alpha=0.5) # Fit a log normal distribution to the normalized data shape, loc, scale = lognorm.fit(data[:, 6] / norm, loc=1) pdf = lognorm.pdf(bincenters, shape, loc, scale) ax.plot(bincenters, pdf, 'r--', label='Log Normal', alpha=0.5) # Save or show the plot plt.legend() plt.savefig('../outputs/escape/fit_plots_nphot1e6/fit_tau{}.pdf'.format( int(tau))) plt.close() return np.log(scale), shape
def lognormFit(series): '''对series(pd.Series或np.array)进行对数正态分布参数估计''' # stats中lognorm分布参数估计和lognormPdf中参数关系: # 若设置floc=0(即始终loc=0),则有s = sigma,scale = e ^ mu # mu = np.log(series).mean() # sigma = np.log(series).std(ddof=0) s, loc, scale = lognorm.fit(series, floc=0) sigma, mu = s, np.log(scale) return mu, sigma
def get_lognormal_para(name): name = 'Inflow' df = pd.read_excel('./model_WWT/SDD_N_P_2012-2019.xlsx', parse_dates=['Date'], index_col='Date', sheet_name=1) start_date = '2012-08-01' end_date = '2019-8-15' mask = (df.index > start_date) & (df.index <= end_date) df = df.loc[mask] if name == 'NH3': data = df.iloc[:, 0] data = data.replace(0, np.nan) data = data.dropna() elif name == 'TP': data = df.iloc[:, 3] data = data.replace(0, np.nan) data = data.dropna() elif name == 'Inflow': data = df.iloc[:, 2] data = data.replace(0, np.nan) data = data.dropna() data = pd.DataFrame(data) data.iloc[:, 0].value_counts() data = data[ data.iloc[:, 0] != 30] # inflow data used is only sewage flow, 30 MGD needs to be removed. data = np.array(data) else: print('wrong inputs') parm = lognorm.fit( data, floc=0) #parm[0] = sigma; parm[1]=location, 0; parm[2]=median, m sigma = parm[0] mu = np.log(parm[2]) # mu is not equal to arithmetic mean mean = np.exp(mu + 1 / 2 * (sigma**2)) mean_data = data.mean() median = np.exp(mu) cv = np.sqrt(np.exp(sigma**2) - 1) sd = mean * np.sqrt(np.exp(sigma**2) - 1) return { 'mu': mu, 'sigma': sigma, 'cv': cv, 'median (scale, m)': parm[2], 'mean (E[X])': mean, 'mean_realdata': mean_data, 'SD[X]': sd, 'location': parm[1] }
def calculate_story_distribution(fd_id): """ Using the department in combination with similar departments, calculate the story distribution of structures in owned census tracts. Only medium and high risk structures are included in the calculations. """ MAX_STORIES = 108 try: fd = FireDepartment.objects.get(id=fd_id) cursor = connections['nfirs'].cursor() except (FireDepartment.DoesNotExist, ConnectionDoesNotExist): return geoms = list(fd.similar_departments.filter(owned_tracts_geom__isnull=False).values_list('owned_tracts_geom', flat=True)) geoms.append(fd.owned_tracts_geom) FIND_STORY_COUNTS = """SELECT count(1), p.story_nbr FROM parcel_stories p JOIN "LUSE_swg" lu ON lu."Code" = p.land_use, (SELECT g.owned_tracts_geom FROM (VALUES {values}) AS g (owned_tracts_geom)) owned_tracts WHERE lu.include_in_floor_dist AND lu.risk_category = %(level)s AND ST_Intersects(owned_tracts.owned_tracts_geom, p.wkb_geometry) GROUP BY p.story_nbr ORDER BY count DESC, p.story_nbr;""" values = ','.join(['(ST_SetSRID(\'{}\'::geometry, 4326))'.format(geom.hex) for geom in geoms]) mapping = {2: 'Medium', 4: 'High'} def expand(values, weights): ret = [] for v in zip(values, weights): ret = ret + [v[0]] * v[1] return ret for nlevel, level in mapping.items(): cursor.execute(FIND_STORY_COUNTS.format(values=values), {'level': level}) res = cursor.fetchall() # Filter out `None` story counts and obnoxious values a = filter(lambda x: x[1] is not None and x[1] <= MAX_STORIES, res) weights = map(lambda x: x[0], a) vals = map(lambda x: x[1], a) expanded = expand(vals, weights) samples = np.random.choice(expanded, size=1000) samp = lognorm.fit(samples) # Fit curve to story counts rm = fd.firedepartmentriskmodels_set.get(level=nlevel) rm.floor_count_coefficients = {'shape': samp[0], 'loc': samp[1], 'scale': samp[2]} rm.save()
def createHisto(A, title='', xlabel='', unit=''): """ Generates one histogram of the given data. """ fig = plt.figure() ax = plt.subplot(111) n, bins, patches = plt.hist(A, _NUMBER_OF_HISTO_BARS, range=(0, A.max()), normed=0, \ weights=np.zeros_like(A)+1./A.size, facecolor='cyan', alpha=0.4, label=' ') # set min and max values to return values = {} values['min'] = A.min() values['minrf'] = n[np.nonzero(n)][0] values['max'] = A.max() values['maxrf'] = n[-1] numbers = title+"\nx: "+str(bins[1:])+"\ny: "+str(n)+"\n\n" # 'best fit' line shape, loc, scale = lognorm.fit(A, floc=0) # Fit a curve to the variates x = np.linspace(0, 1.2 * A.max(), num=500) # scaling binlength = bins[1] - bins[0] alpha = factorize(n, binlength) # plot functions simplefilter("ignore", RuntimeWarning) # avoid warning in this method plt.plot(bins[1:], n, 'c^', alpha=0.5, label='Distribution') plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), 'c--', label='Fit') axe = plt.axis() newaxe =(axe[0], 1.2 * A.max(), axe[2], axe[3]) plt.axis(newaxe) plt.title(title) plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$') plt.xlabel(xlabel) simplefilter("default", RuntimeWarning) # position the legend handles, labels = ax.get_legend_handles_labels() indexL3 = labels.index(' ') labelsL3 = [labels[indexL3]] handlesL3 = [handles[indexL3]] del labels[indexL3] del handles[indexL3] l1 = plt.legend(handlesL3, labelsL3, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0) plt.legend(handles, labels, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0) plt.gca().add_artist(l1) currentaxis = fig.gca() legendText = '$\mathrm{\mathsf{\mu =}}$ %4.2f '+unit+'\n$\mathrm{\mathsf{\sigma =}}$ %4.2f '+unit plt.text(0.96, 0.86, legendText % (scale, (shape * scale)), horizontalalignment='right', \ verticalalignment='top', transform=currentaxis.transAxes) plt.minorticks_on() return fig, values, numbers
def axplot(i_data,linkid,intervalindex,ax): ax.cla() ax.hist(i_data, numberofbin,inputrange,normed=1,label='original distribution') # start to fit shape,location,scale=lognorm.fit(i_data,floc=0) rv=lognorm(shape,location,scale) x=np.linspace(0,140,100) ax.plot(x, rv.pdf(x), 'r-', lw=5, alpha=0.6, label='lognorm fit') ax.set_title("Link id "+ str(linkid)+" and time Interval: " + formatinterval(intervalindex)) ax.set_xlabel('Travel Time (s)') ax.set_ylabel('Probability Density') ax.legend()
def FitPrice(data): priceData = data[:,6] priceData = priceData[~sp.isnan(priceData)] shape, loc, scale = lognorm.fit(priceData,loc = 0) x = np.linspace(0, 100, 100) p = lognorm.pdf(x, shape, loc, scale) maxIndex = 0 for i in range(0, len(p)): if p[i] >= p[maxIndex]: maxIndex = i else: break; # if the plot goes down, stop searching. return x[maxIndex]
def hist(x, weights=None, bins=10, distname='normal', color='b', label='pdf', filename=None): # create full data using weights z = x if weights is not None: z = np.zeros(sum(weights)) j = 0 for i in range(weights.size): for k in range(j, j + weights[i]): z[j] = x[i] j += 1 # histogram hist, bins = np.histogram(x, bins=bins, density=True, weights=weights) # fit distribution if distname is 'normal': (mu, sigma) = norm.fit(z) pdf = lambda x: norm.pdf(x, mu, sigma) elif distname is 'lognormal': sigma, loc, scale = lognorm.fit(z, floc=0) mu = np.log(scale) pdf = lambda x: lognorm.pdf(x, sigma, loc, scale=scale) elif distname is not None: raise Exception('Unsupported distribution name ' + distname) # plot distribution if (distname is not None): x = np.linspace(bins[0], bins[-1], 100) y = pdf(x) label = 'm=%2.1f, s=%2.1f [%s]' % (mu, sigma, label) plt.plot(x, y, linewidth=3, label=label, alpha=0.7, color=color) # plot histogram c = (bins[:-1] + bins[1:]) / 2; # bins centers plt.plot(c, hist, marker='s', alpha=0.7, markersize=8, linestyle='None', color=color) # format plot plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.ylabel('PDF', fontsize=16) if (filename is not None): print('Saving figure ' + filename) plt.savefig(filename, bbos_inches='tight')
rnd_a += np.random.normal(0,noise_size,1000) rnd_b += np.random.normal(0,noise_size,1000) rnd_L += np.random.normal(0,noise_size,1000) cntr_a = a_pchip(cmpos) cntr_b = b_pchip(cmpos) cntr_L = L_pchip(cmpos) delta_a = cntr_a - rnd_a delta_b = cntr_b - rnd_b delta_L = cntr_L - rnd_L delta_E = sqrt(square(delta_a) +square(delta_b) + square(delta_L)) # plot them green rndplt=ax.scatter3D(rnd_a,rnd_b,rnd_L,marker='*',c='green',s=50,linewidth=1) # histogramm delta E plt.figure() n, bins, patches = plt.hist(delta_E,bins=50,color='blue',normed=True,histtype='bar') lnrm_shape, lnrm_loc, lnrm_scale = lognorm.fit(delta_E) x= np.linspace(0, delta_E.max(), num=400) y = lognorm.pdf(x,lnrm_shape,loc=lnrm_loc,scale=lnrm_scale) pdflne=plt.plot(x,y,'r--',linewidth=2)
from scipy import stats from scipy.stats import lognorm rrr=lognorm.rvs(10,loc=0,scale=2,size=1000) print rrr[1:10] print "log normal fit", lognorm.fit(rrr,5,loc=0,scale=3) rrr[1:10] from numpy import rint from numpy import around ppp = around(rrr) print ppp[1:10] print lognorm.fit(ppp,5,loc=0,scale=3)
import matplotlib.pyplot as plt from scipy.stats import norm from scipy.stats import lognorm import math from scipy.interpolate import UnivariateSpline import sys data = sp.genfromtxt(sys.argv[1], delimiter=",") freq = {} priceData = data[:, 4] priceData = priceData[~sp.isnan(priceData)] shape, loc, scale = lognorm.fit(priceData,loc = 0) plt.hist(priceData, bins=100, normed=True, alpha=0.6, color='g') xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p = lognorm.pdf(x, shape, loc, scale) print(p) print(x) maxIndex = 0 for i in range(0, len(p)): if p[i] >= p[maxIndex]: maxIndex = i else: break; # if the plot goes down, stop searching. maxX = x[maxIndex] plt.plot(x, p, 'k', linewidth=2)
def distfit(n,dists,title,ra,dec,fwhm, dm): import numpy as np import matplotlib.pyplot as plt # from scipy.optimize import curve_fit from scipy.stats import lognorm from scipy import ndimage # n = 279 bins = 165 width = 22 # fwhm = 2.0 sig = ((bins/width)*fwhm)/2.355 valsLP = [] for i in range(25000) : random_ra = ra*np.random.random_sample((n,)) random_dec = dec*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) valsLP.append(S_r[x_cent_r][y_cent_r]) # valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True) # vals = np.loadtxt('values.txt', usecols=(0,), unpack=True) # bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True) # centers = (edges[:-1] + edges[1:])/2. # plt.scatter(centers, bins, edgecolors='none') x = np.linspace(2, 22, 4000) # al,loc,beta=lognorm.fit(vals) # print al, loc, beta # # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606') # print lognorm.cdf(dists, al, loc=loc, scale=beta) bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. # x = np.linspace(2, 22, 4000) # dists = np.array([3.958,3.685,3.897,3.317]) al,loc,beta=lognorm.fit(valsLP) # print al, loc, beta plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution') print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)) plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples') # print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta)) ax = plt.subplot(111) plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') # plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc') # plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc') # plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc') plt.ylim(0,1.1) plt.xlim(2,12) plt.xlabel('$\sigma$ above local mean') plt.ylabel('$P(\sigma = X)$') plt.legend(loc='best', frameon=True) ax.set_aspect(3) # plt.show() plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')
log_c = np.array ([np.log (i) for i in c]) log_a = (log_a-np.mean (log_a))/np.std (log_a) log_b = (log_b-np.mean (log_b))/np.std (log_b) log_c = (log_c-np.mean (log_c))/np.std (log_c) print kstest (log_a, 'norm') print kstest (log_b, 'norm') print kstest (log_c, 'norm') plb.hist (b) plb.hist (log_b, bins=20) plb.hist (a, bins=100) plb.hist (log_a, bins=10) shape, loc, scale = lognorm.fit(a) rnd_a = lognorm.rvs(shape, scale=scale, loc=loc, size=len(a)) plb.hist(rnd_a, bins=20, alpha=0.5) plb.hist(a, bins=20, color='r', alpha=0.5) shape, loc, scale = lognorm.fit(c) rnd_c = lognorm.rvs(shape, scale=scale, loc=loc, size=len(c)) plb.hist(rnd_c, bins=30, alpha=0.5) plb.hist(c, bins=30, color='r', alpha=0.5) shape, loc, scale = lognorm.fit(b) rnd_b = lognorm.rvs(shape, scale=scale, loc=loc, size=len(b)) plb.hist(rnd_b, bins=20, alpha=0.5) plb.hist(b, bins=20, color='r', alpha=0.5) np.mean (b)
def test_z(filename, uncorr_algo, distbn_to_fit): '''test case for pdz domain proteins''' algn = read_free(filename) # truncate alignments to sequence positions with # gap frequency no greater than 20% - to avoid over-representation of gaps alignments = truncate(algn, FRAC_ALPHA_CUTOFF) print alignments.shape pdb_res_list = read_pdb(PDZ_PDB_FILE, 'A') msa_algn = msa_search(pdb_res_list, alignments) print msa_algn sca_algn = sca(alignments) algn_shape = get_algn_shape(algn) no_pos = alignments.shape[1] no_seq = algn_shape.no_seq no_aa = algn_shape.no_aa print 'Testing SCA module :' print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin))) print 'weighted_3d_algn hash :' +\ str(np.sum(np.square(sca_algn.weighted_3d_algn))) print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight))) print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX))) print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm))) print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp))) print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs))) spect = spectral_decomp(sca_algn, 100) print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev))) print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd))) svd_output = LA.svd(sca_algn.pwX) U = svd_output[0] sv = svd_output[1] V = svd_output[2] # calculate the matrix Pi = U*V' # this provides a mathematical mapping between # positional and sequence correlation n_min = min(no_seq, no_pos) print U.shape print V.shape print n_min Pi = dot(U[:, 0:n_min], transpose(V[:, 0:n_min])) U_p = dot(Pi, spect.pos_ev) distbn = get_distbn(distbn_to_fit) pd = lognorm.fit(spect.pos_ev[:, 0], floc=0) # floc = 0 holds location to 0 for fitting print pd p_cutoff = 0.8 # cutoff for the cdf xhist = arange(0, 0.4, 0.01) x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100) cdf = lognorm.cdf(x_dist, pd[0], pd[1], pd[2]) # Use case : lognorm.cdf(x, shape, loc, scale) jnk = min(abs(cdf - p_cutoff)) x_dist_pos_right = np.argmin(abs(cdf-p_cutoff)) cutoff_ev = x_dist[x_dist_pos_right] sector_def = np.array(np.where(spect.pos_ev[:, 0] > cutoff_ev)[0])[0] print 'sector definition :' print sector_def
random_ra = 20.0*np.random.random_sample((n,)) random_dec = 20.0*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) sig_values_r.append(S_r[x_cent_r][y_cent_r]) # print >> f1, S_r[x_cent_r][y_cent_r] al,loc,beta=lognorm.fit(sig_values_r) alphas.append(al) betas.append(beta) locs.append(loc) # pct_calc = [sig_values_r[i] for i in range(len(sig_values_r)) if (sig_values_r[i] < S_th)] # percentile = (float(len(pct_calc))/1000.0)*100.0 # print n, S_th, percentile ax0 = plt.subplot(2,2,1) plt.scatter(ns,alphas,c='r', edgecolors='none') # plt.ylim(0,1.1) # plt.xlim(2,12) plt.xlabel('sample size') plt.ylabel('alpha') ax1 = plt.subplot(2,2,2)