def trunclognormprior_pdf(data, mu, sigma): epsilon = 1e-200 term2 = (lognorm.pdf(data, sigma, scale=mu, loc=0.0) / (lognorm.cdf(1.0, sigma, scale=mu, loc=0.0) - lognorm.cdf(0.0, sigma, scale=mu, loc=0.0))) * (data < 1.0) return term2 + epsilon
def calc_Nd_interval_NorESM(input_ds, fromNd, toNd, varNameN): varN = 'NCONC%02.0f' % (1) da_Nd = input_ds[varN] * 0. # keep dimensions, zero value da_Nd.name = varNameN da_Nd.attrs['long_name'] = 'N$_{%.0f-%.0f}$' % (fromNd, toNd) varsNCONC = sized_varListNorESM['NCONC'] varsNMR = sized_varListNorESM['NMR'] * 2 #radius --> diameter varsSIG = sized_varListNorESM['SIGMA'] for varN, varSIG, varNMR in zip(varsNCONC, varsSIG, varsNMR): NCONC = input_ds[varN].values # *10**(-6) #m-3 --> cm-3 SIGMA = input_ds[varSIG].values # case[varSIG][lev]#*10**6 NMR = input_ds[ varNMR].values * 2 # *1e9 #case[varNMR][lev]*2 # radius --> diameter # nconc_ab_nlim[case][model]+=logR*NCONC*lognorm.pdf(logR, np.log(SIGMA),scale=NMR) if fromNd > 0: dummy = NCONC * (lognorm.cdf(toNd, np.log(SIGMA), scale=NMR)) - NCONC * (lognorm.cdf( fromNd, np.log(SIGMA), scale=NMR)) else: dummy = NCONC * (lognorm.cdf(toNd, np.log(SIGMA), scale=NMR)) # if NMR=0 --> nan values. We set these to zero: dummy[NMR == 0] = 0. dummy[NCONC == 0] = 0. dummy[np.isnan(NCONC)] = np.nan da_Nd += dummy return da_Nd
def exotic_price2(x0, k, t, vol, b): s = vol * sqrt(t) mu = -s * s / 2 scl = np.exp(mu) alph = (np.log(k / x0) - (mu + s * s)) / s beta = (np.log(b / x0) - (mu + s * s)) / s p1 = x0 * (norm.cdf(beta) - norm.cdf(alph)) p2 = k * (lognorm.cdf(b / x0, s, scale=scl) - lognorm.cdf(k / x0, s, scale=scl)) # print(p1, p2) return p1 - p2
def simHawkesOneDay( mu: float, alpha: float, beta: float, R0: np.ndarray, nrTrainingDays: int, day: int, cases: np.ndarray, config: EMConfig, threshold: int = 1e-5, ) -> np.ndarray: assert (cases.shape[0] >= nrTrainingDays ), "The number of cases does not match the number of training days" timestamps = nrTrainingDays + day - np.array(range(nrTrainingDays + day)) if config.incubationDistribution == "weibull": intensity = weibull_min.cdf(timestamps + 0.5, c=2.453, scale=6.258) - weibull_min.cdf( timestamps - 0.5, c=2.453, scale=6.258) intensity[len(intensity) - 1] += weibull_min.cdf(0.5, c=2.453, scale=6.258) elif config.incubationDistribution == "gamma": intensity = gamma.cdf(timestamps + 0.5, a=5.807, scale=0.948) - gamma.cdf( timestamps - 0.5, a=5.807, scale=0.948) intensity[len(intensity) - 1] += gamma.cdf(0.5, a=5.807, scale=0.948) elif config.incubationDistribution == "lognormal": sigma = 0.5 mu = 1.63 intensity = lognorm.cdf( timestamps + 0.5, s=sigma, scale=np.exp(mu)) - lognorm.cdf( timestamps - 0.5, s=sigma, scale=np.exp(mu)) intensity[len(intensity) - 1] += lognorm.cdf(0.5, scale=np.exp(mu), s=sigma) elif config.incubationDistribution == "normal": intensity = norm.cdf(timestamps + 0.5, scale=alpha, loc=beta) - norm.cdf( timestamps - 0.5, scale=alpha, loc=beta) intensity[len(intensity) - 1] += norm.cdf(0.5, scale=alpha, loc=beta) else: raise NotImplementedError intensity = intensity[intensity > threshold].reshape(-1, 1) kernelRange = list( range(nrTrainingDays + day - intensity.shape[0], nrTrainingDays + day)) intensityDay = intensity * np.array( R0[kernelRange].T * cases[kernelRange]).reshape(-1, 1) intensityDay = np.round(np.sum(intensityDay) + mu) # TODO: why here poisson distribution instead of just taking expectation? misschien voor confidence interval nrTriggeredCases = np.random.poisson(intensityDay) nrTriggeredCases = min(nrTriggeredCases, swissPopulation) return nrTriggeredCases
def kinetic_dispersion(self): #print self.nd_param.k0_shape, self.nd_param.k0_loc, self.nd_param.k0_scale k0_weights=np.zeros(self.simulation_options["dispersion_bins"]) k_start=lognorm.ppf(0.0001, self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale) k_end=lognorm.ppf(0.9999, self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale) k0_vals=np.linspace(k_start,k_end, self.simulation_options["dispersion_bins"]) k0_weights[0]=lognorm.cdf(k0_vals[0], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale) for k in range(1, len(k0_weights)): k0_weights[k]=lognorm.cdf(k0_vals[k], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)-lognorm.cdf(k0_vals[k-1], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale) #plt.plot(k0_vals, k0_weights) #plt.title("k0") #plt.show() return k0_vals, k0_weights
def precomputeKernelPDF(alpha: float, beta: float, nrTrainingDays: int, config: EMConfig) -> np.ndarray: kernelPDF = np.zeros((nrTrainingDays, nrTrainingDays)) if config.incubationDistribution == "weibull": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = weibull_min.cdf( i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf( i - j - 1, c=alpha, scale=beta) else: kernelPDF[i, j] = weibull_min.cdf( i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf( i - j - 0.5, c=alpha, scale=beta) elif config.incubationDistribution == "gamma": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = gamma.cdf( i - j + 0.5, a=alpha, scale=beta) - gamma.cdf( i - j - 1, a=alpha, scale=beta) else: kernelPDF[i, j] = gamma.cdf( i - j + 0.5, a=alpha, scale=beta) - gamma.cdf( i - j - 0.5, a=alpha, scale=beta) elif config.incubationDistribution == "lognormal": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = lognorm.cdf( i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf( i - j - 1, s=alpha, scale=beta) else: kernelPDF[i, j] = lognorm.cdf( i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf( i - j - 0.5, s=alpha, scale=beta) elif config.incubationDistribution == "normal": for i in range(nrTrainingDays): for j in range(i): if i - j == 1: kernelPDF[i, j] = norm.cdf( i - j + 0.5, scale=alpha, loc=beta) - norm.cdf( i - j - 1, scale=alpha, loc=beta) else: kernelPDF[i, j] = norm.cdf( i - j + 0.5, scale=alpha, loc=beta) - norm.cdf( i - j - 0.5, scale=alpha, loc=beta) else: raise NotImplementedError return kernelPDF
def expected_bands(mean, sd, size_bands): sizes = {} for size_band in size_bands: if '-' in size_band: upper = int(size_band.split('-')[1]) + 1 lower = int(size_band.split('-')[0]) else: upper = np.inf lower = int(size_band[:-1]) sizes[size_band] = lognorm.cdf(upper, s=sd, scale=np.exp(mean)) - lognorm.cdf( lower, s=sd, scale=np.exp(mean)) return sizes
def overtopfailure(overmu, oversigma, overheight): Htop = Htoe + overheight OVERFLOW = flow(Htop) # Overtopping Flow overexpmu = math.exp(overmu) FNOVER = lognorm.cdf(OVERFLOW, oversigma, loc=overmu, scale=overexpmu) FOVER = 1 - FNOVER return FOVER
def get_profit_probability(self, x_vector, y_vector, iv, s, r, t): ''' Returns the probability of obtaining a profit with the strategy with in current scenario under study inputs: x_vector -> vector of underlying prices y_vector -> vector with Black-Scholes results iv -> underlying implied volatility s -> current underlying price ''' p_profit = 0 # Calculate break-even points zero_crossings = np.where(np.diff(np.sign(y_vector)))[0] breakevens = [x_vector[i] for i in zero_crossings] if len(breakevens) > 2: print 'ERROR: more than 2 zeroes detected' elif len(breakevens) == 0: p_profit = (0.9999 if y_vector[(len(y_vector)/2)] > 0 else 0.0001) else: # Get probability of being below the min breakeven at expiration # REVIEW CDF can't return zero! scale = s * np.exp(r * t) p_below = lognorm.cdf(breakevens[0], iv, scale=scale) # Get probability of being above the max breakeven at expiration p_above = lognorm.sf(breakevens[1], iv, scale=scale) # Get the probability of profit for the calendar p_profit = 1 - p_above - p_below print('Profit prob. with s=' + str(s) + ', iv=' + str(iv) + ', b/e=' + str(breakevens)) print('1 - ' + str(p_below) + ' - ' + str(p_above) + ' = ' + str(p_profit)) # TODO debugging purposes return p_profit
def main(mean = 0.5, sd = 1.2): for x in np.linspace(1, 100000, num=16): max_sizes = [0.00001, 5, 10, 20, 50, 100, 250, 10**10] titles = ['0-4', '5-9', '10-19', '20-49', '50-99', '100-249', '250+'] binned_sample_exp = {titles[i]: lognorm.cdf(max_sizes[i + 1], sd, scale=np.exp(mean)) - lognorm.cdf(max_sizes[i], sd, scale=np.exp(mean)) for i in range(len(max_sizes) - 1)} binned_sample_gen = analysis.sort_sample(lognorm.rvs(sd, scale=np.exp(mean), size=int(x))) binned_sample_gen = {s: v / int(x) for s, v in binned_sample_gen.items()} print(binned_sample_gen, binned_sample_exp) with Pool() as p: data = p.starmap(simulation_one_parameter_set.parameter_expectation, [(int(x), mean, sd) for x in np.linspace(0, 100000, num=16)]) mean_with = [] sd_with = [] mean_without = [] sd_without = [] for d in data: mean_with.append(d[0] - mean) sd_with.append(d[1] - sd) mean_without.append(d[2]) sd_without.append(d[3]) plt.plot(np.linspace(0, 100000, num=16), mean_with) plt.plot(np.linspace(0, 100000, num=16), sd_with) plt.show()
def distfit(n,dists,title,width,height,fwhm,dm,samples=1000): from scipy.stats import lognorm bins_h = int(height * 60. / 8.) bins_w = int(width * 60. / 8.) sig = ((bins_w/width)*fwhm)/2.355 valsLP = [] for i in range(samples) : random_ra = width*np.random.random_sample((n,)) random_dec = height*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins_h,bins_w], range=[[0,height],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) valsLP.append(S_r[x_cent_r][y_cent_r]) x = np.linspace(2, 22, 4000) bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. al,loc,beta=lognorm.fit(valsLP) pct = 100.0*lognorm.cdf(dists, al, loc=loc, scale=beta) print 'Significance of detection:','{0:6.3f}%'.format(pct)
def getMu(xCCDF, yCCDF, sizeEvent, PDF): from scipy.stats import lognorm from scipy.special import erf bins = np.sort(sizeEvent) best_chi = 50000 N = len(sizeEvent) n = len(xCCDF) #MLE estimators mu = 1. / N * np.sum(np.log(bins)) sigma = np.sqrt(1. / N * np.sum((np.log(bins) - mu)**2)) scale = np.exp(mu) shape = sigma print([shape, scale]) bestb = 1 bestc = 1 bestd = 1 loc = 0 #[add,loc,bdd] = lognorm.fit(bins) d = 1 Theoretical_CDF = lognorm.cdf(xCCDF, bestb * shape, bestd * loc, bestc * scale) Theoretical_CCDF = 1 - Theoretical_CDF print(bestb, bestc, bestd) return [(bestc * scale), bestb * shape, bestd * loc, 1 - best_chi, Theoretical_CCDF]
def damage(wind_speed, mu, sigma, scale=1.0): """ Calculate the damage level based on a given array of wind speed values and a given mu and sigma value. The mu and sigma control the form of the vulnerability function and are specific to each building class. This version uses the log-normal cumulative probability function to describe the damage level. Wind speed values are stored in metres/second, but the vulnerability relations are based on km/h, so we convert on the fly here. The scale value 's' is used to reduce the total damage for a building type to provide an upper limit to damage (e.g. assumed only damage is to windows/cladding) """ # mu is the scale parameter, sigma is the shape parameter # of the log-normal distribution: #if mu == 0.0: dmg = np.zeros(len(wind_speed)) dmg = scale * lognorm.cdf(wind_speed * 3.6, sigma, scale=mu) # Mask 'small' damage values to be zero: np.putmask(dmg, dmg < EPSILON, 0.0) np.putmask(dmg, mu == 0.0, 0.0) return dmg
def lognorm_test2(): price = 10 rand_vars = norm(loc=1, scale=0.1).rvs(size=100) rand_prices = [price] for i in range(len(rand_vars)): rand_prices.append(rand_prices[i] * rand_vars[i]) # now rand prices should be lognormal distributed, mu of norm = 10, sigma of norm = 2 rand_prices = np.asarray(rand_prices) print(rand_vars) print(rand_prices) fig, ax = plt.subplots() ax.hist(rand_prices) log_rand_prices = np.log(rand_prices) rand_prices_std = rand_prices.std() rand_prices_mean = rand_prices.mean() log_rand_prices_std = log_rand_prices.std() log_rand_prices_mean = log_rand_prices.mean() x = np.linspace(0, 100, 1000) # use norm mu & sigma lognorm_norm_cdf = lognorm.cdf(x, 0.1, 1) # use rand prices std & mean lognorm_rand_cdf = lognorm.cdf(x, rand_prices_std, rand_prices_mean) # ... lognorm_log_cdf = lognorm.cdf(x, log_rand_prices_std, log_rand_prices_mean) # use norm but with ln stuff? norm_trans_cdf = norm.cdf((np.exp(x) - 1) / 0.1) #ax.plot(x, lognorm_norm_cdf, label="Norm CDF") #ax.plot(x, lognorm_rand_cdf, label="Rand CDF") #ax.plot(x, lognorm_log_cdf, label="Log CDF") ax.plot(x, norm_trans_cdf, label="Norm trans CDF") plt.legend() plt.show()
def calc_risk_integral(RTGM, beta, SAs, Probs): from scipy.stats import norm, lognorm from numpy import array, arange, exp, log, trapz, interp, isinf, where from scipy import interpolate from misc_tools import extrap1d FRAGILITY_AT_RTGM = 0.10 BETA = 0.6 AFE4UHGM = -log(1 - 0.02) / 50 # exceedance frequency for 1/2475 yrs TARGET_RISK = -log(1 - 0.01) / 50 ''' SAs = array([ 0.1613, 0.1979, 0.2336, 0.3385, 0.4577, 0.5954, 0.7418, 0.7905, 0.9669, 1.1697]) Probs = array([0.02, 0.01375, 0.01, 0.00445, 0.0021, 0.001, 0.0005, 0.000404, 0.0002, 0.0001]) ''' # get uniform hazard at 1/2475 idx = where(isinf(Probs) == False)[0] Probs = Probs[idx] SAs = SAs[idx] UHGM = exp((interp(log(AFE4UHGM), log(Probs[::-1]), log(SAs[::-1])))) # up sample hazard curve UPSAMPLING_FACTOR = 1.05 SMALLEST_SA = min([min(SAs), UHGM / 20]) LARGEST_SA = max([max(SAs), UHGM * 20]) upSAs = exp( arange(log(SMALLEST_SA), log(LARGEST_SA), log(UPSAMPLING_FACTOR))) f_i = interpolate.interp1d(log(SAs), log(Probs)) f_x = extrap1d(f_i) upProbs = exp(f_x(log(upSAs))) ''' upSAs = SAs upProbs = Probs ''' # get fragility curve FragilityCurve = {} FragilityCurve['Median'] = RTGM / exp(norm.ppf(FRAGILITY_AT_RTGM) * BETA) FragilityCurve['PDF'] = lognorm.pdf(upSAs, BETA, scale=(FragilityCurve['Median'])) FragilityCurve['CDF'] = lognorm.cdf(upSAs, BETA, scale=(FragilityCurve['Median'])) FragilityCurve['SAs'] = upSAs FragilityCurve['Beta'] = BETA # do risk integral Integrand = FragilityCurve['PDF'] * upProbs Risk = trapz(Integrand, upSAs) # calculate collapse probability CollapseProb = 1 - exp(-50 * Risk) RiskCoefficient = RTGM / UHGM return upProbs, upSAs, FragilityCurve, Integrand, CollapseProb
def simple_price2(x0, k, t, vol): s = vol * sqrt(t) mu = -s * s / 2 scl = np.exp(mu) alph = (np.log(k / x0) - (mu + s * s)) / s p1 = x0 * (1 - norm.cdf(alph)) p2 = k * (1 - lognorm.cdf(k / x0, s, scale=scl)) # print(p1, p2) return p1 - p2
def _poe_continuous(fragility_function, iml): variance = fragility_function.stddev ** 2.0 sigma = math.sqrt(math.log( (variance / fragility_function.mean ** 2.0) + 1.0)) mu = fragility_function.mean ** 2.0 / math.sqrt( variance + fragility_function.mean ** 2.0) return lognorm.cdf(iml, sigma, scale=mu)
def calculate_robustness_index(results, enzymesInner, nsteps): ''' Parameters results: dict enzymesInner: lst, enzyme IDs with initial and final reaction nsteps: int, # of integration steps enzymeLBs: ser, lower bounds of enzyme level enzymeUBs: ser, upper bounds of enzyme level Returns robustIdx: ser, median of robustness index Si for each enzyme ''' from scipy.stats import lognorm robustIdx = pd.Series(index=enzymesInner) for enzyme in robustIdx.index: Ss = [] for i in range(len(results[enzyme])): resulti = results[enzyme][i] # feasible LB and UB of enzyme level Eref = resulti[0].loc[enzyme, resulti[0].columns[0]] LB = resulti[0].loc[enzyme, resulti[0].columns[-1]] UB = resulti[1].loc[enzyme, resulti[1].columns[-1]] # calculate the probability of maintaining stability p = lognorm.cdf(UB, s=0.5, scale=Eref) - lognorm.cdf( LB, s=0.5, scale=Eref) # ln(E) ~ N(ln(Eref), 0.5) # calculate the robustness index if p <= 0: p = 0.0001 S = -p * np.log(p) #S = p Ss.append(S) robustIdx.loc[enzyme] = np.mean(Ss) return robustIdx
def estimate_bias(n, mean, sd, sample_size=100): print(n, mean, sd) mean_total = 0 sd_total = 0 fixed_mean_total = 0 fixed_sd_total = 0 for _ in range(sample_size): if n is not None: sample = lognorm.rvs(sd, scale=np.exp(mean), size=n) binned_sample = sort_sample(sample) binned_sample = {s: v / n for s, v in binned_sample.items()} #params = calculate_parameters.max_likelihood(binned_sample, sample.mean()) params = calculate_parameters.max_likelihood(binned_sample) else: max_sizes = [0.00001, 5, 10, 20, 50, 100, 250, 10**10] titles = [ '0-4', '5-9', '10-19', '20-49', '50-99', '100-249', '250+' ] binned_sample = { titles[i]: lognorm.cdf(max_sizes[i + 1], sd, scale=np.exp(mean)) - lognorm.cdf(max_sizes[i], sd, scale=np.exp(mean)) for i in range(len(max_sizes) - 1) } params = calculate_parameters.max_likelihood( binned_sample, np.exp(mean + sd**2 / 2)) #print(params) if params is None: continue recovered_mean, recovered_sd = params mean_total += recovered_mean - mean sd_total += recovered_sd - sd #fixed_mean, fixed_sd = calculate_parameters.remove_bias(recovered_mean, recovered_sd) #fixed_mean_total += fixed_mean #fixed_sd_total += fixed_sd return mean_total / sample_size, sd_total / sample_size, fixed_mean_total / sample_size, fixed_sd_total / sample_size
def f_detect_outlier(sr_input_values, method='triple'): ## 异常值检测函数:三倍标准差、留一 ## 输入: ## 原始值序列(sr_input_values)、方法(method) ## 输出: ## 数据框包含原始值(input_values)、是否异常值(if_outlier) if method == 'triple': # triple std (triple) mu = np.mean(sr_input_values) sigma = np.std(sr_input_values) sr_if_outlier = (sr_input_values < (mu - 3 * sigma)) | (sr_input_values > (mu + 3 * sigma)) return pd.DataFrame( { 'input_values': sr_input_values, 'if_outlier': sr_if_outlier }, columns=['input_values', 'if_outlier']) else: # leave one out (loo) len_loss = len(sr_input_values) arr_p_value = np.zeros(len_loss) for i in range(len_loss): mu1 = np.mean( np.log(sr_input_values.drop(sr_input_values.index[i]))) sigma1 = np.std( np.log(sr_input_values.drop(sr_input_values.index[i]))) arr_p_value[i] = 1 - lognorm.cdf( sr_input_values[i], s=sigma1, scale=np.exp(mu1)) if lognorm.cdf( sr_input_values[i], s=sigma1, scale=np.exp(mu1)) > 0.5 else lognorm.cdf( sr_input_values[i], s=sigma1, scale=np.exp(mu1)) return pd.DataFrame( { 'input_values': sr_input_values, 'p_value': arr_p_value, 'if_outlier': arr_p_value < 0.001 }, columns=['input_values', 'p_value', 'if_outlier'])
def test_z(filename, uncorr_algo, distbn_to_fit): '''test case for pdz domain proteins''' algn = read_free(filename) sca_algn = sca(algn) algn_shape = get_algn_shape(algn) no_pos = algn_shape.no_pos no_seq = algn_shape.no_seq no_aa = algn_shape.no_aa print 'Testing SCA module :' print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin))) print 'weighted_3d_algn hash :' +\ str(np.sum(np.square(sca_algn.weighted_3d_algn))) print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight))) print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX))) print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm))) print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp))) print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs))) pdb_res_list = read_pdb(PDZ_PDB_FILE, 'A') msa_algn = msa_search(pdb_res_list, sca_algn.alignment) spect = spectral_decomp(sca_algn, 100, 100) print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev))) print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd))) print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd))) svd_output = LA.svd(sca_algn.pwX) U = svd_output[0] sv = svd_output[1] V = svd_output[2] # calculate the matrix Pi = U*V' # this provides a mathematical mapping between # positional and sequence correlation n_min = min(no_seq, no_pos) Pi = dot(U[:, 0:n_min-1], transpose(V[:, 0:n_min-1])) U_p = dot(Pi, spect.pos_ev) distbn = get_distbn(distbn_to_fit) pd = distbn.fit(spect.pos_ev[:, 0], floc=0) # floc = 0 holds location to 0 for fitting print pd p_cutoff = 0.8 # cutoff for the cdf xhist = arange(0, 0.4, 0.01) x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100) cdf = lognorm.cdf(x_dist, pd[0], pd[1], pd[2]) # Use case : lognorm.cdf(x, shape, loc, scale) jnk = min(abs(cdf - p_cutoff)) x_dist_pos_right = np.argmin(abs(cdf-p_cutoff)) cutoff_ev = x_dist[x_dist_pos_right] sector_def = np.array(np.where(spect.pos_ev[:, 0] > cutoff_ev)[0])[0]
def test_fit(self): p = generic.fit(self.da, 'lognorm') assert p.dims[0] == 'dparams' assert p.get_axis_num('dparams') == 0 p0 = lognorm.fit(self.da.values[:, 0, 0]) np.testing.assert_array_equal(p[:, 0, 0], p0) # Check that we can reuse the parameters with scipy distributions cdf = lognorm.cdf(.99, *p.values) assert cdf.shape == (self.nx, self.ny)
def test_fit(self): p = generic.fit(self.da, "lognorm") assert p.dims[0] == "dparams" assert p.get_axis_num("dparams") == 0 p0 = lognorm.fit(self.da.values[:, 0, 0]) np.testing.assert_array_equal(p[:, 0, 0], p0) # Check that we can reuse the parameters with scipy distributions cdf = lognorm.cdf(0.99, *p.values) assert cdf.shape == (self.nx, self.ny) assert p.attrs["estimator"] == "Maximum likelihood"
def aleform(): form = Ale_form() if request.method == "POST" and form.validate_on_submit(): #obesity weight = float(form.weight.data) height = float(form.height.data) bmi = float(weight / height**2) #bmi distribution percentilbmi = lognorm.cdf([bmi], 0.1955, -10, 25.71) #value in the obesity county distr val_obse = lognorm.ppf([percentilbmi], 0.0099, -449.9, 474.25) #diabetes diabetes = float(form.diabetes.data) val_dia = lognorm.ppf([diabetes], 0.164, -7.143, 14.58) #smokers smoke = float(form.smoke.data) #number of cigarretes distribution percentilcigars = lognorm.cdf([smoke], 0.506, 0, 2.29) #value in the smoker county distribution val_smoke = lognorm.ppf([percentilcigars], 0.062, -65.19, 88.55) #exercise exercise = float(form.exercise.data) val_exer = lognorm.ppf([exercise], 0.105, -36.41, 62.65) #hsdiploma hsdiploma = float(form.hsdiploma.data) val_dip = lognorm.ppf([hsdiploma], 0.208, -11.3, 24.59) #poverty poverty = float(form.poverty.data) val_pov = lognorm.ppf([poverty], 0.279, -3.594, 15.76) out_person = [val_exer, val_obse, val_smoke, val_dia, val_pov, val_dip] # out_person=[35.41,39,42,17,33.7,35.4] #lo mas bajo #out_person=[8,10,7.9,1.64,3.0,1.6] #lo mas alto #out_person=[35,15,25.5,30.5,45.5,45.5]#example,building the web x_predict = np.array(out_person).reshape(1, -1) result = model_predict.predict(x_predict) result = str(result) #return result return render_template('predict_ale.html', result=result) # return redirect(url_for('predict_ale',out_person=out_person)) return render_template('longevityform.html', title='LONGEVITY', form=form)
def cdf(self, lumi): r""" Gives the value of the CDF at lumi. Args: lumi: float or array-like, point where CDF is evaluated. Notes: CDF given by: $$ \frac{1}{2} + \frac{1}{2} \times \mathrm{erf}\left( \frac{(\ln(x)-\mu)^2}{\sqrt{2}\sigma}\right)$$ """ return lognorm.cdf(lumi, s=self.sigma, scale=np.exp(self.mu))
def poe(self, iml): """ Compute the Probability of Exceedance (PoE) for the given Intensity Measure Level (IML). """ variance = self.stddev ** 2.0 sigma = math.sqrt(math.log( (variance / self.mean ** 2.0) + 1.0)) mu = self.mean ** 2.0 / math.sqrt( variance + self.mean ** 2.0) return lognorm.cdf(iml, sigma, scale=mu)
def cdf(self, lumi): """ Gives the value of the CDF at lumi. Parameters: lumi: float or array-like, point where CDF is evaluated. Notes: CDF given by: 1 1 / (ln(x) - mu)^2 \ --- + --- erf | ---------------- | 2 2 \ sqrt(2) sigma / """ return lognorm.cdf(lumi, s=self.sigma, scale=np.exp(self.mu))
def plotGlobalBeta(sizeEvent): from scipy.stats import beta from scipy.optimize import curve_fit import scipy sizeEvent = np.sort(sizeEvent[sizeEvent>0]) [xCCDF,yCCDF,PDF] = get_CCDF(sizeEvent) popt, pcov = curve_fit(betaDist, xCCDF, PDF) print( "dasfaSDF") print( popt, pcov) hold(True) #plot((xCCDF),(yCCDF),'.-',color=(73./256, 142./256, 204./256),linewidth=2,markersize=10) dist = getattr(scipy.stats, "beta") param = dist.fit(sizeEvent,loc=0,scale=1) pdf_fitted = dist.pdf(xCCDF, *param[:-2], loc=param[-2], scale=param[-1]) plot(xCCDF, 1-np.cumsum(pdf_fitted),'--',color='black',linewidth=2) print(param) ####Theoretical_CDF = beta.cdf(xCCDF,alpha1,beta1) ###Theoretical_CCDF = 1- Theoretical_CDF ###plot((xCCDF),(Theoretical_CCDF),'--',color='black',linewidth=2) from scipy.stats import lognorm Tho2 = 1-lognorm.cdf(xCCDF,1,0,5) #slope1, intercept, r_value, p_value, std_err = linregress(np.log10(xCCDF),np.log10(yCCDF)) #plot(np.log10(xCCDF),intercept+np.log10(xCCDF)*slope1,color='red') #plot(np.log10(xCCDF),np.log10(Tho2),color='red') #legend(['Data',''.join(['Alpha = ', str(param[0]), ', Beta = ', str(param[1])])],prop={'size':12},loc=3) xlabel(r'Severity of attack') ylabel(r'$P(X>s)$') xlim([1,1.05*np.max(xCCDF)]) ylim([np.min([yCCDF,1-np.cumsum(pdf_fitted)])/1.5,0.2+np.max([yCCDF,1-np.cumsum(pdf_fitted)])]) xscale('log') yscale('log')
def expectation_difference(params, size_dist): mean, sd = params expectation = [] actual = [] total = 0 for size_band, n in size_dist.items(): total += n if '-' in size_band: lower = int(size_band.split('-')[0]) upper = int(size_band.split('-')[1]) + 1 else: lower = int(size_band.split('+')[0]) upper = np.inf expectation.append( lognorm.cdf(upper, sd, scale=np.exp(mean)) - lognorm.cdf(lower, sd, scale=np.exp(mean))) actual.append(n) return ((total * np.array(expectation) - np.array(actual))**2).mean()
def cdf(x, w, gamma, mu, sigma): x = np.asarray(x) #f = np.zeros(x.shape) t = np.exp(mu) gamma = np.asarray(gamma) sigma = np.asarray(sigma) f = lognorm.cdf(x, sigma[:, np.newaxis], loc=gamma[:, np.newaxis], scale=t[:, np.newaxis]) #for j in range(m): #f = f + w[j] * lognorm.cdf(x, sigma[j], loc=gamma[j], scale=t[j]) #lognorm3p.cdf(x, gamma=gamma[j], mu=mu[j], sigma=sigma[j]) return np.dot(w, f)
def portableDC( t, deltaT, pdist, mu=0.3, sigma=1.064 ): # unit hour, may add recStartDelay to include multiple restore efforts stateP, timeP, recStartTimeP = pdist['portableDCPrev'] if 'cont' in pdist and pdist[ 'cont'] == 0: # if contS fails, damages dc recovery if stateP == 0: # didn't recover return ( 0, timeP + deltaT, t ) # (fails, fail time accumulates, recovery start time set to now) else: # was ok return (0, 0, t ) # (fails, fail time = 0, recovery start time set to now) else: if stateP == 1: # succ at previous time, keep success return ( stateP, timeP + deltaT, recStartTimeP ) # if it's still ok, no need to recovery. Restore starting time is current. elif t + deltaT < recStartTimeP: # fail at previous time, and t + deltaT < previous start time, recover fail, time passes, startT no change return (0, timeP + deltaT, recStartTimeP) else: # fail at previous time, sample to see whether recovery at this time step psucc = lognorm.cdf( t + deltaT - recStartTimeP, s=sigma, scale=np.exp(mu) ) - lognorm.cdf( t - recStartTimeP, s=sigma, scale=np.exp(mu) ) # correct time based on starting time of current resotre work state = np.random.choice(2, p=[1 - psucc, psucc]) if state == 1: # succ by sample return (state, 0, recStartTimeP) else: # failure time accumulated return (state, timeP + deltaT, recStartTimeP)
def getMu(xCCDF,yCCDF,sizeEvent,PDF): from scipy.stats import lognorm from scipy.special import erf bins = np.sort(sizeEvent) best_chi = 50000 N = len(sizeEvent) n = len(xCCDF) print( 'j') #MLE estimators mu = 1./N*np.sum(np.log(bins)) sigma = np.sqrt(1./N*np.sum((np.log(bins)-mu)**2)) scale = np.exp(mu) shape = sigma print( [shape,scale]) bestb = 1 bestc = 1 bestd = 1 loc = 0 #[add,loc,bdd] = lognorm.fit(bins) d = 1 """ for b in np.linspace(0.5,3,101): for c in np.linspace(0.5,3,101): Theoretical_CDF = lognorm.cdf(xCCDF,b*shape,d*loc,c*scale) Theoretical_PDF = lognorm.pdf(xCCDF,b*shape,d*loc,c*scale) Theoretical_CCDF = 1 - Theoretical_CDF chi = np.sum((PDF-Theoretical_PDF*N)**2/(Theoretical_PDF*N)) if chi < best_chi: bestb = b bestc = c bestd = d best_chi = chi best_chi = stats.chi2.cdf(best_chi,n-2) """ Theoretical_CDF = lognorm.cdf(xCCDF,bestb*shape,bestd*loc,bestc*scale) Theoretical_CCDF = 1- Theoretical_CDF print( bestb,bestc,bestd) return [(bestc*scale),bestb*shape,bestd*loc,1-best_chi,Theoretical_CCDF]
def compute_lognormal_cdf(mean_val, std_val, saving_folder): # Converting mean and std to lognormal parameters mu and sigma sigma = np.sqrt(np.log((std_val**2 / mean_val**2) + 1.0)) mu = np.log(mean_val) - 0.5 * np.log((std_val**2 / mean_val**2) + 1.0) # range of values for which CDF is computed (100 is arbitrary and should be high enough) x_range = np.linspace(0, 50, 1000) # Cumulative distribution function pdf = lognorm.pdf(x_range, s=sigma, scale=np.exp(mu)) cdf = lognorm.cdf(x_range, s=sigma, scale=np.exp(mu)) # Storing cdf, pdf and x_range vectors together proba_mat = np.vstack([x_range, cdf, pdf]).T return proba_mat
def discretelognorm(xpoints,m,v): mu = sp.log(m**2/float(sp.sqrt(v+m**2))) sigma = sp.sqrt(sp.log((v/float(m**2))+1)) xmax = sp.amax(xpoints) xmin = sp.amin(xpoints) N = sp.size(xpoints) xincr = (xmax - xmin)/float(N-1) binnodes = sp.arange(xmin+.5*xincr,xmax + .5*xincr, xincr) lnormcdf = lognorm.cdf(binnodes,sigma,0,sp.exp(mu)) discrpdf = sp.zeros((N,1)) for i in sp.arange(N): if i == 0: discrpdf[i] = lnormcdf[i] elif (i > 0) and (i < N-1): discrpdf[i] = lnormcdf[i] - lnormcdf[i-1] elif (i == N-1): discrpdf[i] = discrpdf[i-1] return discrpdf
start = time.time() fe = FirmEntry() phi_init = np.ones(len(fe.grid_points)) # initial guess of the fixed point # compute the fixed point fixedpoint = fe.compute_fixed_point(T=fe.res_rule_operator, v=phi_init) # recover the reservation cost from the fixed point res_cost = fe.recover_res_rule(fixedpoint) # calculate the perceived probability of investment p # p(mu,gam) = F(res_cost(mu,gam)), F: cdf of LN(mu_f, gam_f) prob_invest = lognorm.cdf(res_cost, s=np.sqrt(fe.gam_f), scale=np.exp(fe.mu_f)) # reshape the reservation cost and the perceived prob. of investment res_cost = np.reshape(res_cost, (fe.musize, fe.gamsize)) prob_invest = np.reshape(prob_invest, (fe.musize, fe.gamsize)) # === plot perceived probability of investment === # """ # Plot the figure on the whole grid range fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111, projection='3d') mu_meshgrid, gam_meshgrid = fe.x, fe.y ax.plot_surface(mu_meshgrid, gam_meshgrid, prob_invest.T, rstride=2, cstride=3, cmap=cm.jet,
def distfit(n,dists,title,ra,dec,fwhm, dm): import numpy as np import matplotlib.pyplot as plt # from scipy.optimize import curve_fit from scipy.stats import lognorm from scipy import ndimage # n = 279 bins = 165 width = 22 # fwhm = 2.0 sig = ((bins/width)*fwhm)/2.355 valsLP = [] for i in range(25000) : random_ra = ra*np.random.random_sample((n,)) random_dec = dec*np.random.random_sample((n,)) random_xy = zip(random_ra,random_dec) grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]]) hist_points_r = zip(xedges_r,yedges_r) grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0) S_r = np.array(grid_gaus_r*0) grid_mean_r = np.mean(grid_gaus_r) grid_sigma_r = np.std(grid_gaus_r) S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) valsLP.append(S_r[x_cent_r][y_cent_r]) # valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True) # vals = np.loadtxt('values.txt', usecols=(0,), unpack=True) # bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True) # centers = (edges[:-1] + edges[1:])/2. # plt.scatter(centers, bins, edgecolors='none') x = np.linspace(2, 22, 4000) # al,loc,beta=lognorm.fit(vals) # print al, loc, beta # # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606') # print lognorm.cdf(dists, al, loc=loc, scale=beta) bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. # x = np.linspace(2, 22, 4000) # dists = np.array([3.958,3.685,3.897,3.317]) al,loc,beta=lognorm.fit(valsLP) # print al, loc, beta plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution') print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)) plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples') # print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta)) ax = plt.subplot(111) plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') # plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc') # plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc') # plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc') plt.ylim(0,1.1) plt.xlim(2,12) plt.xlabel('$\sigma$ above local mean') plt.ylabel('$P(\sigma = X)$') plt.legend(loc='best', frameon=True) ax.set_aspect(3) # plt.show() plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')
def lognorm_cdf(x,mean,std): dist_cdf = lognorm.cdf(x,std,0,mean) return dist_cdf
def distance_metric(self, statistic='all', verbose=False, plot_kwargs1={'color': 'b', 'marker': 'D', 'label': '1'}, plot_kwargs2={'color': 'g', 'marker': 'o', 'label': '2'}, save_name=None): ''' Calculate the distance. *NOTE:* The data are standardized before comparing to ensure the distance is calculated on the same scales. Parameters ---------- statistic : 'all', 'hellinger', 'ks', 'lognormal' Which measure of distance to use. labels : tuple, optional Sets the labels in the output plot. verbose : bool, optional Enables plotting. plot_kwargs1 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset1`. plot_kwargs2 : dict, optional Pass kwargs to `~matplotlib.pyplot.plot` for `dataset2`. save_name : str,optional Save the figure when a file name is given. ''' if statistic is 'all': self.compute_hellinger_distance() self.compute_ks_distance() # self.compute_ad_distance() if self._do_fit: self.compute_lognormal_distance() elif statistic is 'hellinger': self.compute_hellinger_distance() elif statistic is 'ks': self.compute_ks_distance() elif statistic is 'lognormal': if not self._do_fit: raise Exception("Fitting must be enabled to compute the" " lognormal distance.") self.compute_lognormal_distance() # elif statistic is 'ad': # self.compute_ad_distance() else: raise TypeError("statistic must be 'all'," "'hellinger', 'ks', or 'lognormal'.") # "'hellinger', 'ks' or 'ad'.") if verbose: import matplotlib.pyplot as plt defaults1 = {'color': 'b', 'marker': 'D', 'label': '1'} defaults2 = {'color': 'g', 'marker': 'o', 'label': '2'} for key in defaults1: if key not in plot_kwargs1: plot_kwargs1[key] = defaults1[key] for key in defaults2: if key not in plot_kwargs2: plot_kwargs2[key] = defaults2[key] if self.normalization_type == "standardize": xlabel = r"z-score" elif self.normalization_type == "center": xlabel = r"$I - \bar{I}$" elif self.normalization_type == "normalize_by_mean": xlabel = r"$I/\bar{I}$" else: xlabel = r"Intensity" # Print fit summaries if using fitting if self._do_fit: try: print(self.PDF1._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 1!") try: print(self.PDF2._mle_fit.summary()) except ValueError: warn("Covariance calculation failed. Check the fit quality" " for data set 2!") # PDF plt.subplot(121) plt.semilogy(self.bin_centers, self.PDF1.pdf, color=plot_kwargs1['color'], linestyle='none', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) plt.semilogy(self.bin_centers, self.PDF2.pdf, color=plot_kwargs2['color'], linestyle='none', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: # Plot the fitted model. vals = np.linspace(self.bin_centers[0], self.bin_centers[-1], 1000) fit_params1 = self.PDF1.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-') fit_params2 = self.PDF2.model_params plt.semilogy(vals, lognorm.pdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-') plt.grid(True) plt.xlabel(xlabel) plt.ylabel("PDF") plt.legend(frameon=True) # ECDF ax2 = plt.subplot(122) ax2.yaxis.tick_right() ax2.yaxis.set_label_position("right") if self.normalization_type is not None: ax2.plot(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.plot(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.plot(vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-',) ax2.plot(vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-',) else: ax2.semilogx(self.bin_centers, self.PDF1.ecdf, color=plot_kwargs1['color'], linestyle='-', marker=plot_kwargs1['marker'], label=plot_kwargs1['label']) ax2.semilogx(self.bin_centers, self.PDF2.ecdf, color=plot_kwargs2['color'], linestyle='-', marker=plot_kwargs2['marker'], label=plot_kwargs2['label']) if self._do_fit: ax2.semilogx(vals, lognorm.cdf(vals, *fit_params1[:-1], scale=fit_params1[-1], loc=0), color=plot_kwargs1['color'], linestyle='-',) ax2.semilogx(vals, lognorm.cdf(vals, *fit_params2[:-1], scale=fit_params2[-1], loc=0), color=plot_kwargs2['color'], linestyle='-',) plt.grid(True) plt.xlabel(xlabel) plt.ylabel("ECDF") plt.tight_layout() if save_name is not None: plt.savefig(save_name) plt.close() else: plt.show() return self
x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape) sig_vals_r.append(S_r[x_cent_r][y_cent_r]) # valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True) # vals = np.loadtxt('values.txt', usecols=(0,), unpack=True) bins, edges = np.histogram(sig_vals_r, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. # plt.scatter(centers, bins, edgecolors='none') x = np.linspace(2, 22, 4000) dists = np.array([3.958,3.685,3.897,3.317]) al,loc,beta=lognorm.fit(sig_vals_r) print al, loc, beta # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606') print lognorm.cdf(dists, al, loc=loc, scale=beta) bins, edges = np.histogram(sig_vals_r, bins=400, range=[2,22], normed=True) centers = (edges[:-1] + edges[1:])/2. plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples') # x = np.linspace(2, 22, 4000) # dists = np.array([3.958,3.685,3.897,3.317]) # al,loc,beta=lognorm.fit(valsLP) # print al, loc, beta plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal distribution') print lognorm.cdf(dists, al, loc=loc, scale=beta) ax = plt.subplot(111) # plt.plot([3.958,3.958],[-1.0,2.0],'k-', lw=5, alpha=1.0, label='best AGC198606 detection') # plt.plot([10.733,10.733],[-1.0,2.0],'k-', lw=5, alpha=0.5, label='Leo P detection at 1.74 Mpc')
q = np.ones(np.size(y))/np.size(y) # prior belief (if uniform: cross entropy = regular entropy) dual = me.MaxEntDual(q, a, u, e) res = minimize(dual.dual, np.zeros(len(u)), jac=dual.grad, method="BFGS") pdf_y = dual.dist(res.x); figure(figsize=[21, 5.5]) subplot(1, 3, 1) plot(y, pdf_y/dy); xlim(0, 8) xlabel('$y$') title('$\mathbb{E}[\log{(y)}] = 0, \; \mathbb{E}[\log^2{(y)}] = 1, \; y \in (0, 100) $'); subplot(1, 3, 2) cdf_y = np.cumsum(pdf_y) cdf_logn = lognorm.cdf(y,1) plot(cdf_y, cdf_logn,'o') xlabel('$F_Y(y)$') ylabel('$F_{lognorm}(y)$') title('Q-Q plot of ME distribution vs lognormal',fontsize=13) subplot(1, 3, 3) plot(np.log(y), dual.dist(res.x)*y/dy) xlim(-6, 6) xlabel('$\log(y)$') title('$\mathbb{E}[\log{(y)}] = 0, \; \mathbb{E}[\log^2{(y)}] = 1, \; y \in (0, 100) $'); # 5 qn = norm.pdf(np.log(y)) qn = qn/np.sum(qn)
from scipy.stats import lognorm print(lognorm.cdf(1,0.5**2,0,1))
def __init__(self, a, b, n, name, pa=0.1, pb=0.9, lognormal=False, Plot=True): mscale.register_scale(ProbitScale) if Plot: fig = plt.figure(facecolor="white") ax1 = fig.add_subplot(121, axisbelow=True) ax2 = fig.add_subplot(122, axisbelow=True) ax1.set_xlabel(name) ax1.set_ylabel("ECDF and Best Fit CDF") prop = matplotlib.font_manager.FontProperties(size=8) if lognormal: sigma = (log(b) - log(a)) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5)) mu = log(a) - erfinv(2 * pa - 1) * sigma * (2 ** 0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: lognorm.ppf(v, sigma, scale=exp(mu)), cdf) x = lognorm.rvs(sigma, scale=exp(mu), size=n) x.sort() print "generating lognormal %s, p50 %0.3f, size %s" % (name, exp(mu), n) x_s, ecdf_x = ecdf(x) best_fit = lognorm.cdf(x, sigma, scale=exp(mu)) if Plot: ax1.set_xscale("log") ax2.set_xscale("log") hist_y = lognorm.pdf(x_s, std(log(x)), scale=exp(mu)) else: sigma = (b - a) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5)) mu = a - erfinv(2 * pa - 1) * sigma * (2 ** 0.5) cdf = arange(0.001, 1.000, 0.001) ppf = map(lambda v: norm.ppf(v, mu, scale=sigma), cdf) print "generating normal %s, p50 %0.3f, size %s" % (name, mu, n) x = norm.rvs(mu, scale=sigma, size=n) x.sort() x_s, ecdf_x = ecdf(x) best_fit = norm.cdf((x - mean(x)) / std(x)) hist_y = norm.pdf(x_s, loc=mean(x), scale=std(x)) pass if Plot: ax1.plot(ppf, cdf, "r-", linewidth=2) ax1.set_yscale("probit") ax1.plot(x_s, ecdf_x, "o") ax1.plot(x, best_fit, "r--", linewidth=2) n, bins, patches = ax2.hist(x, normed=1, facecolor="green", alpha=0.75) bincenters = 0.5 * (bins[1:] + bins[:-1]) ax2.plot(x_s, hist_y, "r--", linewidth=2) ax2.set_xlabel(name) ax2.set_ylabel("Histogram and Best Fit PDF") ax1.grid(b=True, which="both", color="black", linestyle="-", linewidth=1) # ax1.grid(b=True, which='major', color='black', linestyle='--') ax2.grid(True) return
def fatality_fraction(x, y): erd = w.dose(x, y, dunits='mi', doseunits='Roentgen') if erd > 2000.0: return 1.01 else: return lognorm.cdf(erd, 0.42, scale=450)
sigmaInit = 2*sqrt(truncVarnce) mu, sigma = findTruncNormalRoots(truncMean,truncVarnce,muInit,sigmaInit,minThreshRefl) print "mu = %s" %mu print "sigma = %s" %sigma # Compute empirical distribution function of data reflCompressedSorted = sort(reflCompressed) reflEdf = (rankdata(reflCompressedSorted) - 1)/lenReflCompressed normCdf = norm.cdf( reflCompressedSorted, loc=truncMean, scale=sqrt(truncVarnce) ) truncNormCdf = ( norm.cdf(reflCompressedSorted,mu,sigma) \ - norm.cdf((minRefl-mu)/sigma) ) \ /(1.0-norm.cdf((minRefl-mu)/sigma)) minRefl = amin(reflCompressedSorted) expMuLogN = (truncMean-minRefl)/sqrt(1+truncVarnce/((truncMean-minRefl)**2)) sigma2LogN = log(1+truncVarnce/((truncMean-minRefl)**2)) lognormCdf = lognorm.cdf( reflCompressedSorted - minRefl, sqrt(sigma2LogN), loc=0, scale=expMuLogN ) #pdb.set_trace() DnNormCdf = findKSDn(normCdf, reflEdf) DnTruncNormCdf = findKSDn(truncNormCdf, reflEdf) DnLognormCdf = findKSDn(lognormCdf, reflEdf) print "KS statistic Dn" print "DnNormCdf = %s" %DnNormCdf print "DnTruncNormCdf = %s" %DnTruncNormCdf print "DnLognormCdf = %s" %DnLognormCdf plt.clf() # Plot cumulative distribution functions # Empirical CDF