def confidence_interval_std(std, num, confidence=0.95): """ calculates the confidence interval of the standard deviation given a standard deviation and a number of observations, assuming a normal distribution """ c = stats.chi(num - 1).ppf(0.5 + 0.5 * confidence) lower_bound = np.sqrt(num - 1) * std / c c = stats.chi(num - 1).ppf(0.5 - 0.5 * confidence) upper_bound = np.sqrt(num - 1) * std / c return 0.5 * (upper_bound - lower_bound)
def confidence_interval_std(std, num, confidence=0.95): """ calculates the confidence interval of the standard deviation given a standard deviation and a number of observations, assuming a normal distribution """ c = stats.chi(num - 1).ppf(0.5 + 0.5*confidence) lower_bound = np.sqrt(num - 1)*std/c c = stats.chi(num - 1).ppf(0.5 - 0.5*confidence) upper_bound = np.sqrt(num - 1)*std/c return 0.5*(upper_bound - lower_bound)
def test_crossmatch_cartesian_gaussian_distribution(cartesian_gaussian, contours, n_coordinates): """Test on a Cartesian Gaussian distribution. This distribution has closed-form expressions for the following outputs: * contour_vols * probdensity_vol * searched_prob_vol * searched_vol """ skymap = cartesian_gaussian_to_skymap(6, cartesian_gaussian.mean, cartesian_gaussian.cov) if n_coordinates is None: coordinates = None else: coordinates_xyz = cartesian_gaussian.rvs(size=n_coordinates) coordinates = SkyCoord(*coordinates_xyz.T * u.Mpc, representation_type=CartesianRepresentation) result = crossmatch(skymap, contours=contours, coordinates=coordinates) standard_vol = 4 / 3 * np.pi * np.sqrt( np.linalg.det(cartesian_gaussian.cov)) expected = standard_vol * stats.chi(3).ppf(contours)**3 np.testing.assert_allclose(result.contour_vols, expected, rtol=2e-3) if coordinates is None: assert np.isnan(result.probdensity_vol) assert np.isnan(result.searched_prob_vol) assert np.isnan(result.searched_vol) elif np.size(coordinates) == 0: assert np.size(result.probdensity_vol) == 0 assert np.size(result.searched_prob_vol) == 0 assert np.size(result.searched_vol) == 0 else: expected = cartesian_gaussian.pdf(coordinates_xyz) np.testing.assert_allclose(result.probdensity_vol, expected, rtol=4e-2) d = coordinates_xyz - cartesian_gaussian.mean r = np.sqrt( np.sum(((d @ np.linalg.inv(cartesian_gaussian.cov)) * d), axis=-1)) expected = stats.chi(3).cdf(r) np.testing.assert_allclose(result.searched_prob_vol, expected, atol=1e-2) expected = standard_vol * r**3 np.testing.assert_allclose(result.searched_vol, expected, rtol=6e-2)
def compute(matrixname, chrNum): hash12 = pickle.load(open("hash12", 'rb')) sums1 = pickle.load(open("sums1", 'rb')) sums2 = pickle.load(open("sums2", 'rb')) p = [] sumtime = 0 #takes longer each iteration, ask Dr. Ay for x in hash12: if x not in sums1 or x not in sums2: continue startt = time.time() for y in hash12[x]: chi2, pval, dof, exp = chi([[sums1[x], sums2[x]], [hash12[x][y][0], hash12[x][y][1]]]) p.append(pval) endt = time.time() #print ("Index\t%s\ttime\t%f\tnumberOfTests\t%d" % (x,endt - startt,len(hash12[x]))) sumtime += (endt - startt) print("Computing chromsome %s took %s" % (chrNum, sumtime)) return plothist(p, len(p), matrixname, chrNum)
def __init__(self, dofs): self.dofs = dofs if self.dofs is not None: if self.dofs == 1: self.bounds = np.array([1e-15, np.inf]) else: self.bounds = np.array([0.0, np.inf]) if self.dofs >= 1: mean, var, skew, kurt = chi.stats(dofs, moments='mvsk') self.parent = chi(dofs) self.mean = mean self.variance = var self.skewness = skew self.kurtosis = kurt self.x_range_for_pdf = np.linspace(0.0, 10.0*self.mean,RECURRENCE_PDF_SAMPLES) self.parent = chi(self.dofs)
def threshold_for_N(max_error, samples, desired_std): # TODO each threshold for X and Y should be seperate, otherwise we do mean of 2 std and we might have little and big std # the threshold for std for getting the sigma smaller than desired_std with the given max error # the error might be less than max_error, because if we set threshold and we never reach it, we will never get wrong... # even if we set initial det to be small, after multiply in A it might be large and we will reach the threshold threshold = chi(samples).ppf(max_error) * (desired_std) / np.sqrt(samples) print('for max error of %g, with desired std of %g and %d samples, threshold is %g' % (max_error, desired_std, samples, threshold)) return threshold
def chiScoreCandidate(self, a, antFreq, conFreq): if a < 0.00001: return 0,5 b = antFreq - a c = conFreq - a d = self.N - antFreq - c return chi(np.array([[a,b],[c,d]]))[0:2]
def chi_square_error(df, loc, scale): distribution = chi(df=df, loc=loc, scale=scale) square_errors = [ np.power(mean - distribution.mean(), 2.0), np.power(lejp - distribution.ppf(0.9), 2.0), np.power(uejp - distribution.ppf(0.975), 2.0) ] return square_errors
def summarize(self): """ Summarize data """ list = self.countsym(self.root) obs = list[0][0]+list[1][0] exp = max(list[0][0],list[1][0])*2 print ("Number of observed species: ", obs) print ("Number of extinct species: ", exp-obs) print ("% extinct: ", (exp-obs)/(exp)*100) print ("chi: ", chi(f_obs = obs, f_exp = exp, ddof = -(obs-1)))
def stark_intervals(y, K, alpha, h, options_dict={'maxiter': 500}, method='slsqp'): """ Starks chi-sq intervals. NOTE: - data and K matrix are assumed to be Cholesky transformed Parameters: y (np arr) : m element array Cholesky trans observations K (np arr) : mxn smearing matrix alpha (float) : interval level h (np arr) : n element functional on the parameters options_dict (dict) : optimizer options method (str) : optimizer method for scipy.optimize Returns: tuple -- lower/upper bound """ # dimensions of problem m, n = K.shape # find the chi-sq critical value chisq_q = stats.chi(df=m).ppf(1 - alpha) # define constraint constr_stark = [{ 'type': 'ineq', 'fun': lambda x: chisq_q - np.linalg.norm(y - K @ x) }] # find the bounds for full rank stark_lb = minimize(fun=lambda x: np.dot(h, x), x0=np.zeros(n), constraints=constr_stark, bounds=Bounds(lb=np.zeros(n), ub=np.ones(n) * np.inf), method='slsqp', options=options_dict) stark_ub = minimize(fun=lambda x: -np.dot(h, x), x0=np.zeros(n), constraints=constr_stark, bounds=Bounds(lb=np.zeros(n), ub=np.ones(n) * np.inf), method='slsqp', options=options_dict) assert stark_lb['success'] assert stark_ub['success'] return stark_lb['fun'], -stark_ub['fun']
def chi2fun(flux1,flux2,lamb,lim1,lim2): ''' Method for selecting and fiting region with chi-squared Parameters ---------- flux1: numpy.ndarray array with fluxes for interpoleted observed spectra flux2: numpy.ndarray array for synthetic spectra lamb: numpy.ndarray array with wavelenghs to run chi2 method lim1: float inferior limit for chi2 method lim2: float superior limit for chi2 method f Returns ------- O_chi : numpy.ndarray value of chi2 and p value Comments: It's possible look at the each region by each chi2 analyse in each abundance ran using the command at the terminal: python chi2.py plot The word plot will allow to show this. ''' # Seleting wavelenths for chi squared adjust aux = lim(lamb,lim1,lim2) # Adjusting and renormalyzing the spectra O_chi = chi(norm(flux1[aux]),norm(flux2[1][aux]),ddof=9,axis=0) # Plot each region for Abundance if 'plot' in sys.argv: plt.scatter(aux,norm(flux1[aux]),marker='o') plt.scatter(aux,norm(flux2[1][aux]),marker='^',color='g') plt.legend(("Observed","synthetic"),loc="lower left") plt.plot([min(aux),max(aux)],[1,1],'--k') plt.plot([min(aux),max(aux)],[1,1],'--k') plt.show() # Return the chi2 coeficient and the limits of each region return O_chi[0]
def __init__(self, dofs): if dofs is None: self.dofs = 1 else: self.dofs = dofs if self.dofs < 0: raise ValueError('Invalid parameter in chi distribution: dofs must be positive.') if self.dofs == 1: self.bounds = np.array([1e-15, np.inf]) else: self.bounds = np.array([0.0, np.inf]) mean, var, skew, kurt = chi.stats(dofs, moments='mvsk') self.mean = mean self.variance = var self.skewness = skew self.kurtosis = kurt self.x_range_for_pdf = np.linspace(0.0, 10.0*self.mean,RECURRENCE_PDF_SAMPLES) self.parent = chi(self.dofs)
def test_chi(self): from scipy.stats import chi import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 78 mean, var, skew, kurt = chi.stats(df, moments='mvsk') x = np.linspace(chi.ppf(0.01, df), chi.ppf(0.99, df), 100) ax.plot(x, chi.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi pdf') rv = chi(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = chi.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi.cdf(vals, df)) r = chi.rvs(df, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
def compute(matrixname, chrNum): hash12 = pickle.load(open("hash12",'rb')) sums1 = pickle.load(open("sums1",'rb')) sums2 = pickle.load(open("sums2",'rb')) p = [] sumtime = 0 #takes longer each iteration, ask Dr. Ay for x in hash12: if x not in sums1 or x not in sums2: continue startt = time.time() for y in hash12[x]: chi2, pval, dof, exp = chi([[sums1[x],sums2[x]],[hash12[x][y][0],hash12[x][y][1]]]) p.append(pval) endt = time.time() #print ("Index\t%s\ttime\t%f\tnumberOfTests\t%d" % (x,endt - startt,len(hash12[x]))) sumtime += (endt-startt) print ("Computing chromsome %s took %s" % (chrNum, sumtime)) return plothist(p, len(p), matrixname, chrNum)
def build_gauges(): """Creates gauge object for each observation point's data and appends each to a list. Returns ------- gauges : (list) of Gauge objects """ gauges = list() # Pulu Ai name = 'Pulu Ai' dists = dict() dists['height'] = stats.norm(loc=3, scale=0.8) gauge = Gauge(name, dists) gauge.lat = [-4.5175] gauge.lon = [129.775] gauges.append(gauge) # Ambon name = 'Ambon' dists = dict() dists['height'] = stats.norm(loc=1.8, scale=0.4) gauge = Gauge(name, dists) gauge.lat = [-3.691] gauge.lon = [128.178] gauges.append(gauge) # Banda Neira name = 'Banda Neira' dists = dict() dists['arrival'] = stats.skewnorm(a=2, loc=15, scale=5) dists['height'] = stats.norm(loc=6.5, scale=1.5) dists['inundation'] = stats.norm(loc=185, scale=65) gauge = Gauge(name, dists) gauge.lat = [-4.5248] gauge.lon = [129.8965] gauge.beta = 4.253277987952933 gauge.n = 0.06 gauges.append(gauge) # Buru name = 'Buru' dists = dict() dists['height'] = stats.chi(df=1.01, loc=0.5, scale=1.5) gauge = Gauge(name, dists) gauge.lat = [-3.3815] gauge.lon = [127.113] gauges.append(gauge) # Hulaliu name = 'Hulaliu' dists = dict() dists['height'] = stats.chi(df=1.01, loc=0.5, scale=2.0) gauge = Gauge(name, dists) gauge.lat = [-3.543] gauge.lon = [128.557] gauges.append(gauge) # Saparua name = 'Saparua' dists = dict() dists['arrival'] = stats.norm(loc=45, scale=5) dists['height'] = stats.norm(loc=5, scale=1) dists['inundation'] = stats.norm(loc=125, scale=40) gauge = Gauge(name, dists) gauge.lat = [-3.576] gauge.lon = [128.657] gauge.beta = 1.1067189507222546 gauge.n = 0.06 gauges.append(gauge) # Kulur name = 'Kulur' dists = dict() dists['height'] = stats.norm(loc=3, scale=1) gauge = Gauge(name, dists) gauge.lat = [-3.501] gauge.lon = [128.562] gauges.append(gauge) # Ameth name = 'Ameth' dists = dict() dists['height'] = stats.norm(loc=3, scale=1) gauge = Gauge(name, dists) gauge.lat = [-3.6455] gauge.lon = [128.807] gauges.append(gauge) # Amahai name = 'Amahai' dists = dict() dists['height'] = stats.norm(loc=3.5, scale=1) gauge = Gauge(name, dists) gauge.lat = [-3.338] gauge.lon = [128.921] gauges.append(gauge) return gauges
def test_Rayleigh_to_Chi(self): X = RV(Rayleigh()) sims = X.sim(Nsim) cdf = stats.chi(df=2).cdf pval = stats.kstest(sims, cdf).pvalue self.assertTrue(pval > .01)
33062195.426077582) income_model_dict['exponweib'] = st.exponweib(-3.5157658448986489, 0.44492833350419714, -15427.454196748848, 2440.0278856175246) drivingdistance_model_dict = ct.OrderedDict() drivingdistance_model_dict['nakagami'] = st.nakagami(0.11928581143831021, 14.999999999999996, 41.404620910360876) drivingdistance_model_dict['ncx2'] = st.ncx2(0.30254190304723211, 1.1286538320791935, 14.999999999999998, 8.7361471573932192) drivingdistance_model_dict['chi'] = st.chi(0.47882729877571095, 14.999999999999996, 44.218301183844645) drivingdistance_model_dict['recipinvgauss'] = st.recipinvgauss( 2447246.0546641815, 14.999999999994969, 31.072009722580802) drivingdistance_model_dict['f'] = st.f(0.85798489720127036, 4.1904554804436929, 14.99998319939356, 21.366492843433996) drivingduration_model_dict = ct.OrderedDict() drivingduration_model_dict['betaprime'] = st.betaprime(2.576282082814398, 9.7247974165209996, 9.1193851632305201, 261.3457987967214) drivingduration_model_dict['exponweib'] = st.exponweib(2.6443841639764942, 0.89242254172118096, 10.603640861374947, 40.28556311444698)
y2 = rv2.cdf(x) y3 = rv3.cdf(x) # plot the pdf plt.clf() plt.plot(x, y1, lw=3, label='scale=5') plt.plot(x, y2, lw=3, label='scale=3') plt.plot(x, y3, lw=3, label='scale=7') plt.xlabel('X', fontsize=20) plt.ylabel('PDF', fontsize=15) plt.legend() plt.savefig('/home/tomer/articles/python/tex/images/norm_cdf.png') # generate instance cauchy, chi, exponential, uniform rv1 = st.cauchy(loc=0, scale=5) rv2 = st.chi(2, loc=0, scale=8) rv3 = st.expon(loc=0, scale=7) rv4 = st.uniform(loc=0, scale=20) # estimate pdf at some points y1 = rv1.pdf(x) y2 = rv2.pdf(x) y3 = rv3.pdf(x) y4 = rv4.pdf(x) # plot the pdf plt.clf() plt.plot(x, y1, lw=3, label='Cauchy') plt.plot(x, y2, lw=3, label='Chi') plt.plot(x, y3, lw=3, label='Exponential') plt.plot(x, y4, lw=3, label='Uniform')
df = 78 mean, var, skew, kurt = chi.stats(df, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(chi.ppf(0.01, df), chi.ppf(0.99, df), 100) ax.plot(x, chi.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = chi(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = chi.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi.cdf(vals, df)) # True # Generate random numbers: r = chi.rvs(df, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
import distcan as dc import numpy as np from numpy.testing import assert_allclose import scipy as sp import scipy.stats as st # Get some random places to check pdf np.random.seed(1234) x = np.random.rand(10) # Create chi distributions chi_dc = dc.univariate.Chi(5) chi_sp = st.chi(5) # Check chi cdfs/pdfs against each other chidc_cdf = chi_dc.cdf(x) chisp_cdf = chi_sp.cdf(x) assert_allclose(chidc_cdf, chisp_cdf) # Create chi2 distributions chi2_dc = dc.univariate.Chisq(5) chi2_sp = st.chi2(5) # Check chi2 cdfs/pdfs against each other chi2dc_cdf = chi2_dc.cdf(x) chi2sp_cdf = chi2_sp.cdf(x) assert_allclose(chi2dc_cdf, chi2sp_cdf) # Create Uniform un_dc = dc.univariate.Uniform(0., 7.5)
def __init__(self, k): self.k = k # set dist before calling super's __init__ self.dist = st.chi(df=k) super(Chi, self).__init__()
def __init__(self, name, longitude, latitude, distance, kind, arrival_params, height_params, inundation_params, beta, n, city_name): self.name = name self.city_name = city_name self.longitude = longitude self.latitude = latitude self.distance = distance self.kind = kind self.arrival_params = arrival_params self.height_params = height_params self.beta = beta self.n = n self.inundation_params = inundation_params if name is not None: # Allows for None initialized object # Kind[0] is for Wave Arrival Times # kind[1] is for Wave Height # kind[2] is for Inundation if kind[0] == 'norm': mean = arrival_params[0] std = arrival_params[1] self.arrival_dist = stats.norm(mean, std) elif kind[0] == 'chi2': k = arrival_params[0] loc = arrival_params[1] scale = arrival_params[2] self.arrival_dist = stats.chi2(k, loc=loc, scale=scale) elif kind[0] == 'chi': k = arrival_params[0] loc = arrival_params[1] scale = arrival_params[2] self.arrival_dist = stats.chi(k, loc=loc, scale=scale) elif kind[0] == 'skewnorm': skew_param = arrival_params[0] mean = arrival_params[1] std = arrival_params[2] self.arrival_dist = stats.skewnorm(skew_param, mean, std) if kind[1] == 'norm': mean = height_params[0] std = height_params[1] self.height_dist = stats.norm(mean, std) elif kind[1] == 'chi2': k = height_params[0] loc = height_params[1] scale = height_params[2] self.height_dist = stats.chi2(k, loc=loc, scale=scale) elif kind[1] == 'chi': k = height_params[0] loc = height_params[1] scale = height_params[2] self.height_dist = stats.chi(k, loc=loc, scale=scale) elif kind[1] == 'skewnorm': skew_param = height_params[0] mean = height_params[1] std = height_params[2] self.height_dist = stats.skewnorm(skew_param, mean, std) if kind[2] == 'norm': mean = inundation_params[0] std = inundation_params[1] self.inundation_dist = stats.norm(mean, std) elif kind[2] == 'chi2': k = inundation_params[0] loc = inundation_params[1] scale = inundation_params[2] self.inundation_dist = stats.chi2(k, loc=loc, scale=scale) elif kind[2] == 'chi': k = inundation_params[0] loc = inundation_params[1] scale = inundation_params[2] self.inundation_dist = stats.chi(k, loc=loc, scale=scale) elif kind[2] == 'skewnorm': skew_param = inundation_params[0] mean = inundation_params[1] std = inundation_params[2] self.inundation_dist = stats.skewnorm(skew_param, mean, std)
def build_gauges(): gauges = list() # Pulu Ai name = 'Pulu Ai' dists = dict() dists['height'] = stats.norm(loc=3, scale=0.2) gauge = Gauge(name, dists) gauge.lat = -4.5166 gauge.lon = 129.775 gauges.append(gauge) # Ambon name = 'Ambon' dists = dict() dists['height'] = stats.norm(loc=1.8, scale=0.1) gauge = Gauge(name, dists) gauge.lat = -3.691 gauge.lon = 128.178 gauges.append(gauge) # Banda Neira name = 'Banda Neira' dists = dict() dists['arrival'] = stats.skewnorm(a=2, loc=15, scale=5) dists['height'] = stats.norm(loc=6.5, scale=1) dists['inundation'] = stats.skewnorm(a=3, loc=231, scale=85) gauge = Gauge(name, dists) gauge.lat = -4.5248 gauge.lon = 129.896 gauge.beta = 4.253277987952933 gauge.n = 0.03 gauges.append(gauge) # Buru name = 'Buru' dists = dict() dists['height'] = stats.chi(df=1.01, loc=1.0, scale=1.0) gauge = Gauge(name, dists) gauge.lat = -3.3815 gauge.lon = 127.115 gauges.append(gauge) # Saparua name = 'Saparua' dists = dict() dists['arrival'] = stats.norm(loc=45, scale=5) dists['height'] = stats.norm(loc=5, scale=.75) dists['inundation'] = stats.norm(loc=120, scale=10) gauge = Gauge(name, dists) gauge.lat = -3.576 gauge.lon = 128.657 gauge.beta = 1.1067189507222546 gauge.n = 0.03 gauges.append(gauge) # Kulur name = 'Kulur' dists = dict() dists['height'] = stats.norm(loc=2.5, scale=0.7) gauge = Gauge(name, dists) gauge.lat = -3.501 gauge.lon = 128.562 gauges.append(gauge) # Ameth name = 'Ameth' dists = dict() dists['height'] = stats.norm(loc=3, scale=1) gauge = Gauge(name, dists) gauge.lat = -3.6455 gauge.lon = 128.807 gauges.append(gauge) # Amahai name = 'Amahai' dists = dict() dists['height'] = stats.norm(loc=3.5, scale=1) gauge = Gauge(name, dists) gauge.lat = -3.338 gauge.lon = 128.921 gauges.append(gauge) return gauges
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
import pandas as pd,numpy as np from scipy.stats import chi,chi2 import plotly as py import cufflinks k=20 res=pd.DataFrame() for std in [1,1.5,2,2.5,3]: df=pd.DataFrame(index=np.linspace(0, 28, 1000)) df['chi_%03d_std_%g'%(k,std)]=chi(k).pdf(df.index.values) '''now we fix x axis:''' df.index=std*df.index.values/np.sqrt(k) # k-1 for sampled std with fixing bias and k for sampled std as is res=pd.concat([res,df[df>1e-3].dropna()],axis=0) fig=res.iplot(asFigure=True) py.offline.plot(fig)
def nat_to_scipy_distribution(self, q: ChiNP) -> Any: return ss.chi((q.k_over_two_minus_one + 1.0) * 2.0)
y3 = rv3.cdf(x) # plot the pdf plt.clf() plt.plot(x, y1, lw=3, label='scale=5') plt.plot(x, y2, lw=3, label='scale=3') plt.plot(x, y3, lw=3, label='scale=7') plt.xlabel('X', fontsize=20) plt.ylabel('PDF', fontsize=15) plt.legend() plt.savefig('/home/tomer/articles/python/tex/images/norm_cdf.png') # generate instance cauchy, chi, exponential, uniform rv1 = st.cauchy(loc=0, scale=5) rv2 = st.chi(2, loc=0, scale=8) rv3 = st.expon(loc=0, scale=7) rv4 = st.uniform(loc=0, scale=20) # estimate pdf at some points y1 = rv1.pdf(x) y2 = rv2.pdf(x) y3 = rv3.pdf(x) y4 = rv4.pdf(x) # plot the pdf plt.clf() plt.plot(x, y1, lw=3, label='Cauchy') plt.plot(x, y2, lw=3, label='Chi') plt.plot(x, y3, lw=3, label='Exponential') plt.plot(x, y4, lw=3, label='Uniform')
#%% Plots dist_gamma = gamma(a=best_params_gamma[0], loc=best_params_gamma[1], scale=best_params_gamma[2]) dist_lognorm = lognorm(s=best_params_log[0], loc=best_params_log[1], scale=best_params_log[2]) dist_pareto = pareto(b=best_params_pareto[0], loc=best_params_pareto[1], scale=best_params_pareto[2]) dist_chi = chi(df=best_params_chi[0], loc=best_params_chi[1], scale=best_params_chi[2]) lognorm_mu = np.log(best_params_log[2]) lognorm_sigma = best_params_log[0] gamma_alpha = best_params_gamma[0] gamma_beta = 1 / best_params_gamma[2] pareto_b = best_params_pareto[0] chi_df = best_params_chi[0] x = np.linspace(0, 1, num=500) plt.plot(x, dist_gamma.ppf(x), color='red', label='gamma')
def _scipy(self, loc=0.0, scale=1.0): return ss.chi(df=self.df, loc=loc, scale=scale)
best_dist = getattr(st, best_fit_name) print(best_fit_name, best_fit_params, best_dist) # Make PDF with best params cdf = make_pdf(best_dist, best_fit_params) # Display plt.figure(figsize=(12, 8)) # ax = cdf.plot(lw=2, label='PDF', legend=True) # data.plot(kind='hist', bins=50, normed=True, alpha=0.5, label='Data', legend=True, ax=ax) x = np.linspace(0, 25, 100) plt.plot(days, cdf_die) plt.plot(best_dist.cdf(days, *best_fit_params)) f = lambda x, mu, sigma: st.chi(mu, loc=0, scale=sigma).cdf(x) mu, sigma = curve_fit(f, days, cdf_die)[0] # plt.plot(days, cdf_die) # plt.plot(days, cdf_die) plt.plot(x, st.chi(mu, loc=0, scale=sigma).cdf(x), 'k') # param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale'] # param_str = ', '.join(['{}={:0.2f}'.format(k, v) for k, v in zip(param_names, best_fit_params)]) # dist_str = '{}({})'.format(best_fit_name, param_str) # ax.set_title(u'El Niño sea temp. with best fit distribution \n' + dist_str) # ax.set_xlabel(u'Temp. (°C)') # ax.set_ylabel('Frequency')