Python cdf 예제들, scipy.stats.lognorm.cdf Python 예제들

예제 #1

0

파일 보기

파일: Truncated_Viewer_ParallelFits.py 프로젝트: hemanklamba/ModelingDwellTime

def trunclognormprior_pdf(data, mu, sigma):
    epsilon = 1e-200
    term2 = (lognorm.pdf(data, sigma, scale=mu, loc=0.0) /
             (lognorm.cdf(1.0, sigma, scale=mu, loc=0.0) -
              lognorm.cdf(0.0, sigma, scale=mu, loc=0.0))) * (data < 1.0)

    return term2 + epsilon

예제 #2

0

파일 보기

def calc_Nd_interval_NorESM(input_ds, fromNd, toNd, varNameN):
    varN = 'NCONC%02.0f' % (1)
    da_Nd = input_ds[varN] * 0.  # keep dimensions, zero value
    da_Nd.name = varNameN
    da_Nd.attrs['long_name'] = 'N$_{%.0f-%.0f}$' % (fromNd, toNd)
    varsNCONC = sized_varListNorESM['NCONC']
    varsNMR = sized_varListNorESM['NMR'] * 2  #radius --> diameter
    varsSIG = sized_varListNorESM['SIGMA']
    for varN, varSIG, varNMR in zip(varsNCONC, varsSIG, varsNMR):
        NCONC = input_ds[varN].values  # *10**(-6) #m-3 --> cm-3
        SIGMA = input_ds[varSIG].values  # case[varSIG][lev]#*10**6
        NMR = input_ds[
            varNMR].values * 2  # *1e9 #case[varNMR][lev]*2  #  radius --> diameter

        # nconc_ab_nlim[case][model]+=logR*NCONC*lognorm.pdf(logR, np.log(SIGMA),scale=NMR)
        if fromNd > 0:
            dummy = NCONC * (lognorm.cdf(toNd, np.log(SIGMA),
                                         scale=NMR)) - NCONC * (lognorm.cdf(
                                             fromNd, np.log(SIGMA), scale=NMR))
        else:
            dummy = NCONC * (lognorm.cdf(toNd, np.log(SIGMA), scale=NMR))
        # if NMR=0 --> nan values. We set these to zero:
        dummy[NMR == 0] = 0.
        dummy[NCONC == 0] = 0.
        dummy[np.isnan(NCONC)] = np.nan
        da_Nd += dummy
    return da_Nd

예제 #3

0

파일 보기

def exotic_price2(x0, k, t, vol, b):
    s = vol * sqrt(t)
    mu = -s * s / 2
    scl = np.exp(mu)
    alph = (np.log(k / x0) - (mu + s * s)) / s
    beta = (np.log(b / x0) - (mu + s * s)) / s
    p1 = x0 * (norm.cdf(beta) - norm.cdf(alph))
    p2 = k * (lognorm.cdf(b / x0, s, scale=scl) -
              lognorm.cdf(k / x0, s, scale=scl))
    # print(p1, p2)
    return p1 - p2

예제 #4

0

파일 보기

파일: simulateHawkes.py 프로젝트: amrayschwabe/PredictingCovidSpreadMobility

def simHawkesOneDay(
    mu: float,
    alpha: float,
    beta: float,
    R0: np.ndarray,
    nrTrainingDays: int,
    day: int,
    cases: np.ndarray,
    config: EMConfig,
    threshold: int = 1e-5,
) -> np.ndarray:
    assert (cases.shape[0] >= nrTrainingDays
            ), "The number of cases does not match the number of training days"
    timestamps = nrTrainingDays + day - np.array(range(nrTrainingDays + day))
    if config.incubationDistribution == "weibull":
        intensity = weibull_min.cdf(timestamps + 0.5, c=2.453,
                                    scale=6.258) - weibull_min.cdf(
                                        timestamps - 0.5, c=2.453, scale=6.258)
        intensity[len(intensity) - 1] += weibull_min.cdf(0.5,
                                                         c=2.453,
                                                         scale=6.258)
    elif config.incubationDistribution == "gamma":
        intensity = gamma.cdf(timestamps + 0.5, a=5.807,
                              scale=0.948) - gamma.cdf(
                                  timestamps - 0.5, a=5.807, scale=0.948)
        intensity[len(intensity) - 1] += gamma.cdf(0.5, a=5.807, scale=0.948)
    elif config.incubationDistribution == "lognormal":
        sigma = 0.5
        mu = 1.63
        intensity = lognorm.cdf(
            timestamps + 0.5, s=sigma, scale=np.exp(mu)) - lognorm.cdf(
                timestamps - 0.5, s=sigma, scale=np.exp(mu))
        intensity[len(intensity) - 1] += lognorm.cdf(0.5,
                                                     scale=np.exp(mu),
                                                     s=sigma)
    elif config.incubationDistribution == "normal":
        intensity = norm.cdf(timestamps + 0.5, scale=alpha,
                             loc=beta) - norm.cdf(
                                 timestamps - 0.5, scale=alpha, loc=beta)
        intensity[len(intensity) - 1] += norm.cdf(0.5, scale=alpha, loc=beta)
    else:
        raise NotImplementedError
    intensity = intensity[intensity > threshold].reshape(-1, 1)
    kernelRange = list(
        range(nrTrainingDays + day - intensity.shape[0], nrTrainingDays + day))
    intensityDay = intensity * np.array(
        R0[kernelRange].T * cases[kernelRange]).reshape(-1, 1)
    intensityDay = np.round(np.sum(intensityDay) + mu)
    # TODO: why here poisson distribution instead of just taking expectation? misschien voor confidence interval
    nrTriggeredCases = np.random.poisson(intensityDay)
    nrTriggeredCases = min(nrTriggeredCases, swissPopulation)
    return nrTriggeredCases

예제 #5

0

파일 보기

 def kinetic_dispersion(self):
     #print self.nd_param.k0_shape, self.nd_param.k0_loc, self.nd_param.k0_scale
     k0_weights=np.zeros(self.simulation_options["dispersion_bins"])
     k_start=lognorm.ppf(0.0001, self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)
     k_end=lognorm.ppf(0.9999, self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)
     k0_vals=np.linspace(k_start,k_end, self.simulation_options["dispersion_bins"])
     k0_weights[0]=lognorm.cdf(k0_vals[0], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)
     for k in range(1, len(k0_weights)):
         k0_weights[k]=lognorm.cdf(k0_vals[k], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)-lognorm.cdf(k0_vals[k-1], self.nd_param.k0_shape, loc=self.nd_param.k0_loc, scale=self.nd_param.k0_scale)
     #plt.plot(k0_vals, k0_weights)
     #plt.title("k0")
     #plt.show()
     return k0_vals, k0_weights

예제 #6

0

파일 보기

def precomputeKernelPDF(alpha: float, beta: float, nrTrainingDays: int,
                        config: EMConfig) -> np.ndarray:
    kernelPDF = np.zeros((nrTrainingDays, nrTrainingDays))
    if config.incubationDistribution == "weibull":
        for i in range(nrTrainingDays):
            for j in range(i):
                if i - j == 1:
                    kernelPDF[i, j] = weibull_min.cdf(
                        i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf(
                            i - j - 1, c=alpha, scale=beta)
                else:
                    kernelPDF[i, j] = weibull_min.cdf(
                        i - j + 0.5, c=alpha, scale=beta) - weibull_min.cdf(
                            i - j - 0.5, c=alpha, scale=beta)
    elif config.incubationDistribution == "gamma":
        for i in range(nrTrainingDays):
            for j in range(i):
                if i - j == 1:
                    kernelPDF[i, j] = gamma.cdf(
                        i - j + 0.5, a=alpha, scale=beta) - gamma.cdf(
                            i - j - 1, a=alpha, scale=beta)
                else:
                    kernelPDF[i, j] = gamma.cdf(
                        i - j + 0.5, a=alpha, scale=beta) - gamma.cdf(
                            i - j - 0.5, a=alpha, scale=beta)
    elif config.incubationDistribution == "lognormal":
        for i in range(nrTrainingDays):
            for j in range(i):
                if i - j == 1:
                    kernelPDF[i, j] = lognorm.cdf(
                        i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf(
                            i - j - 1, s=alpha, scale=beta)
                else:
                    kernelPDF[i, j] = lognorm.cdf(
                        i - j + 0.5, s=alpha, scale=beta) - lognorm.cdf(
                            i - j - 0.5, s=alpha, scale=beta)
    elif config.incubationDistribution == "normal":
        for i in range(nrTrainingDays):
            for j in range(i):
                if i - j == 1:
                    kernelPDF[i, j] = norm.cdf(
                        i - j + 0.5, scale=alpha, loc=beta) - norm.cdf(
                            i - j - 1, scale=alpha, loc=beta)
                else:
                    kernelPDF[i, j] = norm.cdf(
                        i - j + 0.5, scale=alpha, loc=beta) - norm.cdf(
                            i - j - 0.5, scale=alpha, loc=beta)
    else:
        raise NotImplementedError
    return kernelPDF

예제 #7

0

파일 보기

파일: parameter_fit_analysis.py 프로젝트: alfredholmes/UK-Company-Data

def expected_bands(mean, sd, size_bands):
    sizes = {}
    for size_band in size_bands:
        if '-' in size_band:
            upper = int(size_band.split('-')[1]) + 1
            lower = int(size_band.split('-')[0])
        else:
            upper = np.inf
            lower = int(size_band[:-1])

        sizes[size_band] = lognorm.cdf(upper, s=sd,
                                       scale=np.exp(mean)) - lognorm.cdf(
                                           lower, s=sd, scale=np.exp(mean))

    return sizes

예제 #8

0

파일 보기

파일: DDP.py 프로젝트: ruihui0821/UncertainNonstationaryHydrology

def overtopfailure(overmu, oversigma, overheight):
    Htop = Htoe + overheight
    OVERFLOW = flow(Htop)  # Overtopping Flow
    overexpmu = math.exp(overmu)
    FNOVER = lognorm.cdf(OVERFLOW, oversigma, loc=overmu, scale=overexpmu)
    FOVER = 1 - FNOVER
    return FOVER

예제 #9

0

파일 보기

파일: calendar_spread.py 프로젝트: tashik/smartcondor

    def get_profit_probability(self, x_vector, y_vector, iv, s, r, t):
        '''
        Returns the probability of obtaining a profit with the strategy with
        in current scenario under study
        inputs:
            x_vector -> vector of underlying prices
            y_vector -> vector with Black-Scholes results
            iv -> underlying implied volatility
            s -> current underlying price
        '''
        p_profit = 0
        # Calculate break-even points
        zero_crossings = np.where(np.diff(np.sign(y_vector)))[0]
        breakevens = [x_vector[i] for i in zero_crossings]
        if len(breakevens) > 2:
            print 'ERROR: more than 2 zeroes detected'
        elif len(breakevens) == 0:
            p_profit = (0.9999 if y_vector[(len(y_vector)/2)] > 0 else 0.0001)
        else:
            # Get probability of being below the min breakeven at expiration
            # REVIEW CDF can't return zero!
            scale = s * np.exp(r * t)
            p_below = lognorm.cdf(breakevens[0], iv, scale=scale)
            # Get probability of being above the max breakeven at expiration
            p_above = lognorm.sf(breakevens[1], iv, scale=scale)
            # Get the probability of profit for the calendar
            p_profit = 1 - p_above - p_below
            print('Profit prob. with s=' + str(s) + ', iv=' + str(iv) +
                  ', b/e=' + str(breakevens))
            print('1 - ' + str(p_below) + ' - ' + str(p_above) + ' = ' +
                  str(p_profit))  # TODO debugging purposes

        return p_profit

예제 #10

0

파일 보기

def main(mean = 0.5, sd = 1.2):
	for x in np.linspace(1, 100000, num=16):
			max_sizes = [0.00001, 5, 10, 20, 50, 100, 250, 10**10]
			titles = ['0-4', '5-9', '10-19', '20-49', '50-99', '100-249', '250+']

			binned_sample_exp = {titles[i]: lognorm.cdf(max_sizes[i + 1], sd, scale=np.exp(mean)) - lognorm.cdf(max_sizes[i], sd, scale=np.exp(mean)) for i in range(len(max_sizes) - 1)}
			binned_sample_gen = analysis.sort_sample(lognorm.rvs(sd, scale=np.exp(mean), size=int(x)))
			binned_sample_gen = {s: v / int(x) for s, v in binned_sample_gen.items()}
			print(binned_sample_gen, binned_sample_exp)
	with Pool() as p:
		data = p.starmap(simulation_one_parameter_set.parameter_expectation, [(int(x), mean, sd) for x in np.linspace(0, 100000, num=16)])

	mean_with = []
	sd_with = []
	mean_without = []
	sd_without = []

	for d in data:
		mean_with.append(d[0] - mean)
		sd_with.append(d[1] - sd)
		mean_without.append(d[2])
		sd_without.append(d[3])

	plt.plot(np.linspace(0, 100000, num=16), mean_with)
	plt.plot(np.linspace(0, 100000, num=16), sd_with)

	plt.show()

예제 #11

0

파일 보기

파일: test.py 프로젝트: bjanesh/uchvc-tools

def distfit(n,dists,title,width,height,fwhm,dm,samples=1000):
    from scipy.stats import lognorm

    bins_h = int(height * 60. / 8.)
    bins_w = int(width * 60. / 8.)
    sig = ((bins_w/width)*fwhm)/2.355
    valsLP = []
    for i in range(samples) :
        random_ra = width*np.random.random_sample((n,))
        random_dec = height*np.random.random_sample((n,))
        random_xy = zip(random_ra,random_dec)
        grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins_h,bins_w], range=[[0,height],[0,width]])
        hist_points_r = zip(xedges_r,yedges_r)
        grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
        S_r = np.array(grid_gaus_r*0)

        grid_mean_r = np.mean(grid_gaus_r)
        grid_sigma_r = np.std(grid_gaus_r)
        S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r

        x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
        valsLP.append(S_r[x_cent_r][y_cent_r])

    x = np.linspace(2, 22, 4000)

    bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
    centers = (edges[:-1] + edges[1:])/2.

    al,loc,beta=lognorm.fit(valsLP)
    pct = 100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)
    print 'Significance of detection:','{0:6.3f}%'.format(pct)

예제 #12

0

파일 보기

def getMu(xCCDF, yCCDF, sizeEvent, PDF):
    from scipy.stats import lognorm
    from scipy.special import erf
    bins = np.sort(sizeEvent)
    best_chi = 50000
    N = len(sizeEvent)
    n = len(xCCDF)

    #MLE estimators
    mu = 1. / N * np.sum(np.log(bins))
    sigma = np.sqrt(1. / N * np.sum((np.log(bins) - mu)**2))
    scale = np.exp(mu)
    shape = sigma
    print([shape, scale])
    bestb = 1
    bestc = 1
    bestd = 1
    loc = 0
    #[add,loc,bdd] = lognorm.fit(bins)

    d = 1

    Theoretical_CDF = lognorm.cdf(xCCDF, bestb * shape, bestd * loc,
                                  bestc * scale)

    Theoretical_CCDF = 1 - Theoretical_CDF
    print(bestb, bestc, bestd)

    return [(bestc * scale), bestb * shape, bestd * loc, 1 - best_chi,
            Theoretical_CCDF]

예제 #13

0

파일 보기

파일: damage.py 프로젝트: wcarthur/gmma

def damage(wind_speed, mu, sigma, scale=1.0):
    """
    Calculate the damage level based on a given array of wind speed
    values and a given mu and sigma value. The mu and sigma control
    the form of the vulnerability function and are specific to
    each building class. This version uses the log-normal cumulative
    probability function to describe the damage level.

    Wind speed values are stored in metres/second, but the vulnerability
    relations are based on km/h, so we convert on the fly here.

    The scale value 's' is used to reduce the total damage for a building
    type to provide an upper limit to damage (e.g. assumed only damage
    is to windows/cladding)
    """

    # mu is the scale parameter, sigma is the shape parameter
    # of the log-normal distribution:
    #if mu == 0.0:
    dmg = np.zeros(len(wind_speed))
    dmg = scale * lognorm.cdf(wind_speed * 3.6, sigma, scale=mu)
    # Mask 'small' damage values to be zero:
    np.putmask(dmg, dmg < EPSILON, 0.0)
    np.putmask(dmg, mu == 0.0, 0.0)

    return dmg

예제 #14

0

파일 보기

파일: MonteCarloTest.py 프로젝트: python-in-my-pants/new_finance

def lognorm_test2():

    price = 10
    rand_vars = norm(loc=1, scale=0.1).rvs(size=100)
    rand_prices = [price]

    for i in range(len(rand_vars)):
        rand_prices.append(rand_prices[i] * rand_vars[i])

    # now rand prices should be lognormal distributed, mu of norm = 10, sigma of norm = 2

    rand_prices = np.asarray(rand_prices)
    print(rand_vars)
    print(rand_prices)

    fig, ax = plt.subplots()
    ax.hist(rand_prices)

    log_rand_prices = np.log(rand_prices)

    rand_prices_std = rand_prices.std()
    rand_prices_mean = rand_prices.mean()

    log_rand_prices_std = log_rand_prices.std()
    log_rand_prices_mean = log_rand_prices.mean()

    x = np.linspace(0, 100, 1000)

    # use norm mu & sigma
    lognorm_norm_cdf = lognorm.cdf(x, 0.1, 1)

    # use rand prices std & mean
    lognorm_rand_cdf = lognorm.cdf(x, rand_prices_std, rand_prices_mean)

    # ...
    lognorm_log_cdf = lognorm.cdf(x, log_rand_prices_std, log_rand_prices_mean)

    # use norm but with ln stuff?
    norm_trans_cdf = norm.cdf((np.exp(x) - 1) / 0.1)

    #ax.plot(x, lognorm_norm_cdf, label="Norm CDF")
    #ax.plot(x, lognorm_rand_cdf, label="Rand CDF")
    #ax.plot(x, lognorm_log_cdf, label="Log CDF")
    ax.plot(x, norm_trans_cdf, label="Norm trans CDF")
    plt.legend()
    plt.show()

예제 #15

0

파일 보기

파일: canberra_darwin_example.py 프로젝트: treviallen/manuscripts

def calc_risk_integral(RTGM, beta, SAs, Probs):
    from scipy.stats import norm, lognorm
    from numpy import array, arange, exp, log, trapz, interp, isinf, where
    from scipy import interpolate
    from misc_tools import extrap1d

    FRAGILITY_AT_RTGM = 0.10
    BETA = 0.6
    AFE4UHGM = -log(1 - 0.02) / 50  # exceedance frequency for 1/2475 yrs
    TARGET_RISK = -log(1 - 0.01) / 50
    '''
    SAs = array([ 0.1613, 0.1979, 0.2336, 0.3385, 0.4577, 0.5954, 0.7418, 0.7905, 0.9669, 1.1697])
    Probs = array([0.02, 0.01375, 0.01, 0.00445, 0.0021, 0.001, 0.0005, 0.000404, 0.0002, 0.0001])
    '''
    # get uniform hazard at 1/2475
    idx = where(isinf(Probs) == False)[0]
    Probs = Probs[idx]
    SAs = SAs[idx]

    UHGM = exp((interp(log(AFE4UHGM), log(Probs[::-1]), log(SAs[::-1]))))

    # up sample hazard curve
    UPSAMPLING_FACTOR = 1.05
    SMALLEST_SA = min([min(SAs), UHGM / 20])
    LARGEST_SA = max([max(SAs), UHGM * 20])

    upSAs = exp(
        arange(log(SMALLEST_SA), log(LARGEST_SA), log(UPSAMPLING_FACTOR)))

    f_i = interpolate.interp1d(log(SAs), log(Probs))
    f_x = extrap1d(f_i)
    upProbs = exp(f_x(log(upSAs)))
    '''
    upSAs = SAs
    upProbs = Probs
    '''
    # get fragility curve
    FragilityCurve = {}
    FragilityCurve['Median'] = RTGM / exp(norm.ppf(FRAGILITY_AT_RTGM) * BETA)
    FragilityCurve['PDF'] = lognorm.pdf(upSAs,
                                        BETA,
                                        scale=(FragilityCurve['Median']))
    FragilityCurve['CDF'] = lognorm.cdf(upSAs,
                                        BETA,
                                        scale=(FragilityCurve['Median']))
    FragilityCurve['SAs'] = upSAs
    FragilityCurve['Beta'] = BETA

    # do risk integral
    Integrand = FragilityCurve['PDF'] * upProbs
    Risk = trapz(Integrand, upSAs)

    # calculate collapse probability
    CollapseProb = 1 - exp(-50 * Risk)

    RiskCoefficient = RTGM / UHGM

    return upProbs, upSAs, FragilityCurve, Integrand, CollapseProb

예제 #16

0

파일 보기

def simple_price2(x0, k, t, vol):
    s = vol * sqrt(t)
    mu = -s * s / 2
    scl = np.exp(mu)
    alph = (np.log(k / x0) - (mu + s * s)) / s
    p1 = x0 * (1 - norm.cdf(alph))
    p2 = k * (1 - lognorm.cdf(k / x0, s, scale=scl))
    # print(p1, p2)
    return p1 - p2

예제 #17

0

파일 보기

파일: fragility_function.py 프로젝트: gvallarelli/oq-risklib

def _poe_continuous(fragility_function, iml):
    variance = fragility_function.stddev ** 2.0
    sigma = math.sqrt(math.log(
        (variance / fragility_function.mean ** 2.0) + 1.0))

    mu = fragility_function.mean ** 2.0 / math.sqrt(
        variance + fragility_function.mean ** 2.0)

    return lognorm.cdf(iml, sigma, scale=mu)

예제 #18

0

파일 보기

def calculate_robustness_index(results, enzymesInner, nsteps):
    '''
	Parameters
	results: dict
	enzymesInner: lst, enzyme IDs with initial and final reaction
	nsteps: int, # of integration steps
	enzymeLBs: ser, lower bounds of enzyme level
	enzymeUBs: ser, upper bounds of enzyme level
		
	Returns
	robustIdx: ser, median of robustness index Si for each enzyme
	'''

    from scipy.stats import lognorm

    robustIdx = pd.Series(index=enzymesInner)
    for enzyme in robustIdx.index:

        Ss = []
        for i in range(len(results[enzyme])):

            resulti = results[enzyme][i]

            # feasible LB and UB of enzyme level
            Eref = resulti[0].loc[enzyme, resulti[0].columns[0]]

            LB = resulti[0].loc[enzyme, resulti[0].columns[-1]]
            UB = resulti[1].loc[enzyme, resulti[1].columns[-1]]

            # calculate the probability of maintaining stability
            p = lognorm.cdf(UB, s=0.5, scale=Eref) - lognorm.cdf(
                LB, s=0.5, scale=Eref)  # ln(E) ~ N(ln(Eref), 0.5)

            # calculate the robustness index
            if p <= 0: p = 0.0001

            S = -p * np.log(p)
            #S = p

            Ss.append(S)

        robustIdx.loc[enzyme] = np.mean(Ss)

    return robustIdx

예제 #19

0

파일 보기

파일: analysis.py 프로젝트: alfredholmes/UK-Company-Data

def estimate_bias(n, mean, sd, sample_size=100):
    print(n, mean, sd)
    mean_total = 0
    sd_total = 0
    fixed_mean_total = 0
    fixed_sd_total = 0
    for _ in range(sample_size):
        if n is not None:
            sample = lognorm.rvs(sd, scale=np.exp(mean), size=n)
            binned_sample = sort_sample(sample)
            binned_sample = {s: v / n for s, v in binned_sample.items()}

            #params = calculate_parameters.max_likelihood(binned_sample, sample.mean())
            params = calculate_parameters.max_likelihood(binned_sample)

        else:
            max_sizes = [0.00001, 5, 10, 20, 50, 100, 250, 10**10]
            titles = [
                '0-4', '5-9', '10-19', '20-49', '50-99', '100-249', '250+'
            ]

            binned_sample = {
                titles[i]:
                lognorm.cdf(max_sizes[i + 1], sd, scale=np.exp(mean)) -
                lognorm.cdf(max_sizes[i], sd, scale=np.exp(mean))
                for i in range(len(max_sizes) - 1)
            }

            params = calculate_parameters.max_likelihood(
                binned_sample, np.exp(mean + sd**2 / 2))
            #print(params)

        if params is None:
            continue
        recovered_mean, recovered_sd = params

        mean_total += recovered_mean - mean
        sd_total += recovered_sd - sd

        #fixed_mean, fixed_sd = calculate_parameters.remove_bias(recovered_mean, recovered_sd)
        #fixed_mean_total += fixed_mean
        #fixed_sd_total += fixed_sd

    return mean_total / sample_size, sd_total / sample_size, fixed_mean_total / sample_size, fixed_sd_total / sample_size

예제 #20

0

파일 보기

파일: 01_static_test.py 프로젝트: bjwangchao1/StaticPoolOptimize

def f_detect_outlier(sr_input_values, method='triple'):
    ## 异常值检测函数：三倍标准差、留一
    ## 输入：
    ## 原始值序列（sr_input_values）、方法（method）
    ## 输出：
    ## 数据框包含原始值（input_values）、是否异常值（if_outlier）

    if method == 'triple':
        # triple std (triple)
        mu = np.mean(sr_input_values)
        sigma = np.std(sr_input_values)
        sr_if_outlier = (sr_input_values <
                         (mu - 3 * sigma)) | (sr_input_values >
                                              (mu + 3 * sigma))
        return pd.DataFrame(
            {
                'input_values': sr_input_values,
                'if_outlier': sr_if_outlier
            },
            columns=['input_values', 'if_outlier'])
    else:
        # leave one out (loo)
        len_loss = len(sr_input_values)
        arr_p_value = np.zeros(len_loss)
        for i in range(len_loss):
            mu1 = np.mean(
                np.log(sr_input_values.drop(sr_input_values.index[i])))
            sigma1 = np.std(
                np.log(sr_input_values.drop(sr_input_values.index[i])))
            arr_p_value[i] = 1 - lognorm.cdf(
                sr_input_values[i], s=sigma1,
                scale=np.exp(mu1)) if lognorm.cdf(
                    sr_input_values[i], s=sigma1,
                    scale=np.exp(mu1)) > 0.5 else lognorm.cdf(
                        sr_input_values[i], s=sigma1, scale=np.exp(mu1))
        return pd.DataFrame(
            {
                'input_values': sr_input_values,
                'p_value': arr_p_value,
                'if_outlier': arr_p_value < 0.001
            },
            columns=['input_values', 'p_value', 'if_outlier'])

예제 #21

0

파일 보기

파일: sca_test_script.py 프로젝트: famagusta/sca_python

def test_z(filename, uncorr_algo, distbn_to_fit):
    '''test case for pdz domain proteins'''
    algn = read_free(filename)
    sca_algn = sca(algn)
    algn_shape = get_algn_shape(algn)
    no_pos = algn_shape.no_pos
    no_seq = algn_shape.no_seq
    no_aa = algn_shape.no_aa
    print 'Testing SCA module :'
    print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin)))
    print 'weighted_3d_algn hash :' +\
        str(np.sum(np.square(sca_algn.weighted_3d_algn)))
    print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight)))
    print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX)))
    print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm)))
    print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp)))
    print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs)))
    pdb_res_list = read_pdb(PDZ_PDB_FILE, 'A')
    msa_algn = msa_search(pdb_res_list, sca_algn.alignment)

    spect = spectral_decomp(sca_algn, 100, 100)
    print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev)))
    print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd)))

    svd_output = LA.svd(sca_algn.pwX)
    U = svd_output[0]
    sv = svd_output[1]
    V = svd_output[2]

    # calculate the matrix Pi = U*V'
    # this provides a mathematical mapping between
    # positional and sequence correlation

    n_min = min(no_seq, no_pos)
    Pi = dot(U[:, 0:n_min-1], transpose(V[:, 0:n_min-1]))
    U_p = dot(Pi, spect.pos_ev)

    distbn = get_distbn(distbn_to_fit)
    pd = distbn.fit(spect.pos_ev[:, 0], floc=0)
    # floc = 0 holds location to 0 for fitting
    print pd

    p_cutoff = 0.8  # cutoff for the cdf
    xhist = arange(0, 0.4, 0.01)
    x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100)
    cdf = lognorm.cdf(x_dist, pd[0], pd[1], pd[2])
    # Use case : lognorm.cdf(x, shape, loc, scale)

    jnk = min(abs(cdf - p_cutoff))
    x_dist_pos_right = np.argmin(abs(cdf-p_cutoff))
    cutoff_ev = x_dist[x_dist_pos_right]
    sector_def = np.array(np.where(spect.pos_ev[:, 0] > cutoff_ev)[0])[0]

예제 #22

0

파일 보기

    def test_fit(self):
        p = generic.fit(self.da, 'lognorm')

        assert p.dims[0] == 'dparams'
        assert p.get_axis_num('dparams') == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)

예제 #23

0

파일 보기

파일: test_generic.py 프로젝트: ellesmith88/xclim

    def test_fit(self):
        p = generic.fit(self.da, "lognorm")

        assert p.dims[0] == "dparams"
        assert p.get_axis_num("dparams") == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(0.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)
        assert p.attrs["estimator"] == "Maximum likelihood"

예제 #24

0

파일 보기

def aleform():
    form = Ale_form()
    if request.method == "POST" and form.validate_on_submit():
        #obesity
        weight = float(form.weight.data)
        height = float(form.height.data)
        bmi = float(weight / height**2)
        #bmi distribution
        percentilbmi = lognorm.cdf([bmi], 0.1955, -10, 25.71)
        #value in the obesity county distr
        val_obse = lognorm.ppf([percentilbmi], 0.0099, -449.9, 474.25)
        #diabetes
        diabetes = float(form.diabetes.data)
        val_dia = lognorm.ppf([diabetes], 0.164, -7.143, 14.58)
        #smokers
        smoke = float(form.smoke.data)
        #number of cigarretes distribution
        percentilcigars = lognorm.cdf([smoke], 0.506, 0, 2.29)
        #value in the smoker county distribution
        val_smoke = lognorm.ppf([percentilcigars], 0.062, -65.19, 88.55)
        #exercise
        exercise = float(form.exercise.data)
        val_exer = lognorm.ppf([exercise], 0.105, -36.41, 62.65)
        #hsdiploma
        hsdiploma = float(form.hsdiploma.data)
        val_dip = lognorm.ppf([hsdiploma], 0.208, -11.3, 24.59)
        #poverty
        poverty = float(form.poverty.data)
        val_pov = lognorm.ppf([poverty], 0.279, -3.594, 15.76)
        out_person = [val_exer, val_obse, val_smoke, val_dia, val_pov, val_dip]
        # out_person=[35.41,39,42,17,33.7,35.4]   #lo mas bajo
        #out_person=[8,10,7.9,1.64,3.0,1.6]    #lo mas alto
        #out_person=[35,15,25.5,30.5,45.5,45.5]#example,building the web
        x_predict = np.array(out_person).reshape(1, -1)
        result = model_predict.predict(x_predict)
        result = str(result)
        #return result
        return render_template('predict_ale.html', result=result)
    # return redirect(url_for('predict_ale',out_person=out_person))
    return render_template('longevityform.html', title='LONGEVITY', form=form)

예제 #25

0

파일 보기

파일: Luminosity.py 프로젝트: icecube/FIRESONG

    def cdf(self, lumi):
        r""" Gives the value of the CDF at lumi.

        Args:
            lumi: float or array-like, point where CDF is evaluated.

        Notes:
            CDF given by:

            $$ \frac{1}{2} + \frac{1}{2} \times 
            \mathrm{erf}\left( \frac{(\ln(x)-\mu)^2}{\sqrt{2}\sigma}\right)$$
        """
        return lognorm.cdf(lumi, s=self.sigma, scale=np.exp(self.mu))

예제 #26

0

파일 보기

파일: input.py 프로젝트: pslh/oq-risklib

    def poe(self, iml):
        """
        Compute the Probability of Exceedance (PoE) for the given
        Intensity Measure Level (IML).
        """
        variance = self.stddev ** 2.0
        sigma = math.sqrt(math.log(
            (variance / self.mean ** 2.0) + 1.0))

        mu = self.mean ** 2.0 / math.sqrt(
            variance + self.mean ** 2.0)

        return lognorm.cdf(iml, sigma, scale=mu)

예제 #27

0

파일 보기

    def cdf(self, lumi):
        """ Gives the value of the CDF at lumi.

        Parameters:
            lumi: float or array-like, point where CDF is evaluated.

        Notes:
            CDF given by:
             1     1       /  (ln(x) - mu)^2   \
            --- + --- erf |  ----------------   |
             2     2       \   sqrt(2) sigma   /
        """
        return lognorm.cdf(lumi, s=self.sigma, scale=np.exp(self.mu))

예제 #28

0

파일 보기

파일: numberAttacks.py 프로젝트: jgarciab/schoolShootings

def plotGlobalBeta(sizeEvent):
    from scipy.stats import beta
    from scipy.optimize import curve_fit
    import scipy


    sizeEvent = np.sort(sizeEvent[sizeEvent>0])
    [xCCDF,yCCDF,PDF] = get_CCDF(sizeEvent)

    popt, pcov = curve_fit(betaDist, xCCDF, PDF)
    print( "dasfaSDF")
    print( popt, pcov)


    hold(True)

    #plot((xCCDF),(yCCDF),'.-',color=(73./256, 142./256, 204./256),linewidth=2,markersize=10)

    dist = getattr(scipy.stats, "beta")
    param = dist.fit(sizeEvent,loc=0,scale=1)


    pdf_fitted = dist.pdf(xCCDF, *param[:-2], loc=param[-2], scale=param[-1])
    plot(xCCDF, 1-np.cumsum(pdf_fitted),'--',color='black',linewidth=2)

    print(param)





    ####Theoretical_CDF = beta.cdf(xCCDF,alpha1,beta1)
    ###Theoretical_CCDF = 1- Theoretical_CDF
    ###plot((xCCDF),(Theoretical_CCDF),'--',color='black',linewidth=2)


    from scipy.stats import lognorm
    Tho2 = 1-lognorm.cdf(xCCDF,1,0,5)
    #slope1, intercept, r_value, p_value, std_err = linregress(np.log10(xCCDF),np.log10(yCCDF))
    #plot(np.log10(xCCDF),intercept+np.log10(xCCDF)*slope1,color='red')
    #plot(np.log10(xCCDF),np.log10(Tho2),color='red')
    #legend(['Data',''.join(['Alpha = ', str(param[0]), ', Beta = ', str(param[1])])],prop={'size':12},loc=3)


    xlabel(r'Severity of attack')
    ylabel(r'$P(X>s)$')
    xlim([1,1.05*np.max(xCCDF)])
    ylim([np.min([yCCDF,1-np.cumsum(pdf_fitted)])/1.5,0.2+np.max([yCCDF,1-np.cumsum(pdf_fitted)])])
    xscale('log')
    yscale('log')

예제 #29

0

파일 보기

def expectation_difference(params, size_dist):
    mean, sd = params

    expectation = []
    actual = []
    total = 0

    for size_band, n in size_dist.items():
        total += n
        if '-' in size_band:
            lower = int(size_band.split('-')[0])
            upper = int(size_band.split('-')[1]) + 1
        else:
            lower = int(size_band.split('+')[0])
            upper = np.inf

        expectation.append(
            lognorm.cdf(upper, sd, scale=np.exp(mean)) -
            lognorm.cdf(lower, sd, scale=np.exp(mean)))

        actual.append(n)

    return ((total * np.array(expectation) - np.array(actual))**2).mean()

예제 #30

0

파일 보기

def cdf(x, w, gamma, mu, sigma):
    x = np.asarray(x)
    #f = np.zeros(x.shape)
    t = np.exp(mu)
    gamma = np.asarray(gamma)
    sigma = np.asarray(sigma)
    f = lognorm.cdf(x,
                    sigma[:, np.newaxis],
                    loc=gamma[:, np.newaxis],
                    scale=t[:, np.newaxis])
    #for j in range(m):
    #f = f + w[j] * lognorm.cdf(x, sigma[j], loc=gamma[j], scale=t[j])
    #lognorm3p.cdf(x, gamma=gamma[j], mu=mu[j], sigma=sigma[j])
    return np.dot(w, f)

예제 #31

0

파일 보기

파일: timeDepTemplate.py 프로젝트: yncai/quantifyBN

def portableDC(
    t,
    deltaT,
    pdist,
    mu=0.3,
    sigma=1.064
):  # unit hour, may add recStartDelay to include multiple restore efforts
    stateP, timeP, recStartTimeP = pdist['portableDCPrev']
    if 'cont' in pdist and pdist[
            'cont'] == 0:  # if contS fails, damages dc recovery
        if stateP == 0:  # didn't recover
            return (
                0, timeP + deltaT, t
            )  # (fails, fail time accumulates, recovery start time set to now)
        else:  # was ok
            return (0, 0, t
                    )  # (fails, fail time = 0, recovery start time set to now)

    else:
        if stateP == 1:  # succ at previous time, keep success
            return (
                stateP, timeP + deltaT, recStartTimeP
            )  # if it's still ok, no need to recovery. Restore starting time is current.
        elif t + deltaT < recStartTimeP:  # fail at previous time, and t + deltaT < previous start time, recover fail, time passes, startT no change
            return (0, timeP + deltaT, recStartTimeP)
        else:  # fail at previous time, sample to see whether recovery at this time step
            psucc = lognorm.cdf(
                t + deltaT - recStartTimeP, s=sigma, scale=np.exp(mu)
            ) - lognorm.cdf(
                t - recStartTimeP, s=sigma, scale=np.exp(mu)
            )  # correct time based on starting time of current resotre work
            state = np.random.choice(2, p=[1 - psucc, psucc])
            if state == 1:  # succ by sample
                return (state, 0, recStartTimeP)
            else:  # failure time accumulated
                return (state, timeP + deltaT, recStartTimeP)

예제 #32

0

파일 보기

파일: numberAttacks.py 프로젝트: jgarciab/schoolShootings

def getMu(xCCDF,yCCDF,sizeEvent,PDF):
    from scipy.stats import lognorm
    from scipy.special import erf
    bins = np.sort(sizeEvent)
    best_chi = 50000
    N = len(sizeEvent)
    n = len(xCCDF)
    print( 'j')


    #MLE estimators
    mu = 1./N*np.sum(np.log(bins))
    sigma = np.sqrt(1./N*np.sum((np.log(bins)-mu)**2))
    scale = np.exp(mu)
    shape = sigma
    print( [shape,scale])
    bestb = 1
    bestc = 1
    bestd = 1
    loc = 0
    #[add,loc,bdd] = lognorm.fit(bins)

    d = 1
    """
    for b in np.linspace(0.5,3,101):
        for c in np.linspace(0.5,3,101):
            Theoretical_CDF = lognorm.cdf(xCCDF,b*shape,d*loc,c*scale)
            Theoretical_PDF = lognorm.pdf(xCCDF,b*shape,d*loc,c*scale)
            Theoretical_CCDF = 1 - Theoretical_CDF

            chi  = np.sum((PDF-Theoretical_PDF*N)**2/(Theoretical_PDF*N))

            if chi < best_chi:
                bestb = b
                bestc = c
                bestd = d
                best_chi = chi

    best_chi =  stats.chi2.cdf(best_chi,n-2)
    """
    Theoretical_CDF = lognorm.cdf(xCCDF,bestb*shape,bestd*loc,bestc*scale)

    Theoretical_CCDF = 1- Theoretical_CDF


    print( bestb,bestc,bestd)

    return [(bestc*scale),bestb*shape,bestd*loc,1-best_chi,Theoretical_CCDF]

예제 #33

0

파일 보기

파일: utils.py 프로젝트: cmehl/EpidemicDynamics

def compute_lognormal_cdf(mean_val, std_val, saving_folder):

    # Converting mean and std to lognormal parameters mu and sigma
    sigma = np.sqrt(np.log((std_val**2 / mean_val**2) + 1.0))
    mu = np.log(mean_val) - 0.5 * np.log((std_val**2 / mean_val**2) + 1.0)

    # range of values for which CDF is computed (100 is arbitrary and should be high enough)
    x_range = np.linspace(0, 50, 1000)

    # Cumulative distribution function
    pdf = lognorm.pdf(x_range, s=sigma, scale=np.exp(mu))
    cdf = lognorm.cdf(x_range, s=sigma, scale=np.exp(mu))

    # Storing cdf, pdf and x_range vectors together
    proba_mat = np.vstack([x_range, cdf, pdf]).T

    return proba_mat

예제 #34

0

파일 보기

파일: jobSearch.py 프로젝트: KathleenF/numerical_computing

def discretelognorm(xpoints,m,v):
    
    mu = sp.log(m**2/float(sp.sqrt(v+m**2)))
    sigma = sp.sqrt(sp.log((v/float(m**2))+1))    
    
    xmax  = sp.amax(xpoints) 
    xmin  = sp.amin(xpoints) 
    N     = sp.size(xpoints) 
    xincr = (xmax - xmin)/float(N-1)

    binnodes = sp.arange(xmin+.5*xincr,xmax + .5*xincr, xincr)
    lnormcdf  = lognorm.cdf(binnodes,sigma,0,sp.exp(mu)) 
    discrpdf  = sp.zeros((N,1))
    for i in sp.arange(N):
        if i == 0:
            discrpdf[i] = lnormcdf[i] 
        elif (i > 0) and (i < N-1):
            discrpdf[i] = lnormcdf[i] - lnormcdf[i-1] 
        elif (i == N-1):
            discrpdf[i] = discrpdf[i-1]

    return discrpdf

예제 #35

0

파일 보기

파일: FirmEntry.py 프로젝트: Qingyin-Ma/Optimal-Stopping-Project


start = time.time()
		
fe = FirmEntry()
phi_init = np.ones(len(fe.grid_points)) # initial guess of the fixed point

# compute the fixed point
fixedpoint = fe.compute_fixed_point(T=fe.res_rule_operator, v=phi_init)

# recover the reservation cost from the fixed point 
res_cost = fe.recover_res_rule(fixedpoint)

# calculate the perceived probability of investment p
# p(mu,gam) = F(res_cost(mu,gam)), F: cdf of LN(mu_f, gam_f)
prob_invest = lognorm.cdf(res_cost, s=np.sqrt(fe.gam_f), 
					      scale=np.exp(fe.mu_f))

# reshape the reservation cost and the perceived prob. of investment
res_cost = np.reshape(res_cost, (fe.musize, fe.gamsize))
prob_invest = np.reshape(prob_invest, (fe.musize, fe.gamsize))


# === plot perceived probability of investment === #
"""
# Plot the figure on the whole grid range

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
mu_meshgrid, gam_meshgrid = fe.x, fe.y
ax.plot_surface(mu_meshgrid, gam_meshgrid, prob_invest.T,
				rstride=2, cstride=3, cmap=cm.jet,

예제 #36

0

파일 보기

파일: distfit.py 프로젝트: bjanesh/uchvc-tools

def distfit(n,dists,title,ra,dec,fwhm, dm):
	import numpy as np
	import matplotlib.pyplot as plt
	# from scipy.optimize import curve_fit
	from scipy.stats import lognorm
	from scipy import ndimage
	
	# n = 279
	bins = 165
	width = 22 
	# fwhm = 2.0
	sig = ((bins/width)*fwhm)/2.355
	valsLP = []
	for i in range(25000) :
		random_ra = ra*np.random.random_sample((n,))
		random_dec = dec*np.random.random_sample((n,))
		random_xy = zip(random_ra,random_dec)
		grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]])
		hist_points_r = zip(xedges_r,yedges_r)
		grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
		S_r = np.array(grid_gaus_r*0)
		
		grid_mean_r = np.mean(grid_gaus_r)
		grid_sigma_r = np.std(grid_gaus_r)
		S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r
		
		x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
		valsLP.append(S_r[x_cent_r][y_cent_r])
	# valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True)
	# vals = np.loadtxt('values.txt', usecols=(0,), unpack=True)
	
	# bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True)
	# centers = (edges[:-1] + edges[1:])/2.
	# plt.scatter(centers, bins, edgecolors='none')
	
	x = np.linspace(2, 22, 4000)
	
	# al,loc,beta=lognorm.fit(vals)
	# print al, loc, beta
	# # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606')
	# print lognorm.cdf(dists, al, loc=loc, scale=beta)
	
	bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
	centers = (edges[:-1] + edges[1:])/2.
	
	
	# x = np.linspace(2, 22, 4000)
	# dists = np.array([3.958,3.685,3.897,3.317])
	al,loc,beta=lognorm.fit(valsLP)
	# print al, loc, beta
	plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution')
	print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta))
	
	plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples')
	# print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta))
	
	
	ax = plt.subplot(111)
	plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') 
	# plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc')
	# plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc')
	# plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc')
	plt.ylim(0,1.1)
	plt.xlim(2,12)
	plt.xlabel('$\sigma$ above local mean')
	plt.ylabel('$P(\sigma = X)$')
	plt.legend(loc='best', frameon=True)
	ax.set_aspect(3)
	# plt.show()
	plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')

예제 #37

0

파일 보기

파일: hill-climb.py 프로젝트: msdogan/HydropowerProject

def lognorm_cdf(x,mean,std):
  dist_cdf = lognorm.cdf(x,std,0,mean)
  return dist_cdf

예제 #38

0

파일 보기

파일: compare_pdf.py 프로젝트: Astroua/TurbuStat

    def distance_metric(self, statistic='all', verbose=False,
                        plot_kwargs1={'color': 'b', 'marker': 'D',
                                      'label': '1'},
                        plot_kwargs2={'color': 'g', 'marker': 'o',
                                      'label': '2'},
                        save_name=None):
        '''
        Calculate the distance.
        *NOTE:* The data are standardized before comparing to ensure the
        distance is calculated on the same scales.

        Parameters
        ----------
        statistic : 'all', 'hellinger', 'ks', 'lognormal'
            Which measure of distance to use.
        labels : tuple, optional
            Sets the labels in the output plot.
        verbose : bool, optional
            Enables plotting.
        plot_kwargs1 : dict, optional
            Pass kwargs to `~matplotlib.pyplot.plot` for
            `dataset1`.
        plot_kwargs2 : dict, optional
            Pass kwargs to `~matplotlib.pyplot.plot` for
            `dataset2`.
        save_name : str,optional
            Save the figure when a file name is given.
        '''

        if statistic is 'all':
            self.compute_hellinger_distance()
            self.compute_ks_distance()
            # self.compute_ad_distance()
            if self._do_fit:
                self.compute_lognormal_distance()
        elif statistic is 'hellinger':
            self.compute_hellinger_distance()
        elif statistic is 'ks':
            self.compute_ks_distance()
        elif statistic is 'lognormal':
            if not self._do_fit:
                raise Exception("Fitting must be enabled to compute the"
                                " lognormal distance.")
            self.compute_lognormal_distance()
        # elif statistic is 'ad':
        #     self.compute_ad_distance()
        else:
            raise TypeError("statistic must be 'all',"
                            "'hellinger', 'ks', or 'lognormal'.")
                            # "'hellinger', 'ks' or 'ad'.")

        if verbose:

            import matplotlib.pyplot as plt

            defaults1 = {'color': 'b', 'marker': 'D', 'label': '1'}
            defaults2 = {'color': 'g', 'marker': 'o', 'label': '2'}

            for key in defaults1:
                if key not in plot_kwargs1:
                    plot_kwargs1[key] = defaults1[key]
            for key in defaults2:
                if key not in plot_kwargs2:
                    plot_kwargs2[key] = defaults2[key]

            if self.normalization_type == "standardize":
                xlabel = r"z-score"
            elif self.normalization_type == "center":
                xlabel = r"$I - \bar{I}$"
            elif self.normalization_type == "normalize_by_mean":
                xlabel = r"$I/\bar{I}$"
            else:
                xlabel = r"Intensity"

            # Print fit summaries if using fitting
            if self._do_fit:
                try:
                    print(self.PDF1._mle_fit.summary())
                except ValueError:
                    warn("Covariance calculation failed. Check the fit quality"
                         " for data set 1!")
                try:
                    print(self.PDF2._mle_fit.summary())
                except ValueError:
                    warn("Covariance calculation failed. Check the fit quality"
                         " for data set 2!")

            # PDF
            plt.subplot(121)
            plt.semilogy(self.bin_centers, self.PDF1.pdf,
                         color=plot_kwargs1['color'], linestyle='none',
                         marker=plot_kwargs1['marker'],
                         label=plot_kwargs1['label'])
            plt.semilogy(self.bin_centers, self.PDF2.pdf,
                         color=plot_kwargs2['color'], linestyle='none',
                         marker=plot_kwargs2['marker'],
                         label=plot_kwargs2['label'])
            if self._do_fit:
                # Plot the fitted model.
                vals = np.linspace(self.bin_centers[0], self.bin_centers[-1],
                                   1000)

                fit_params1 = self.PDF1.model_params
                plt.semilogy(vals,
                             lognorm.pdf(vals, *fit_params1[:-1],
                                         scale=fit_params1[-1],
                                         loc=0),
                             color=plot_kwargs1['color'], linestyle='-')

                fit_params2 = self.PDF2.model_params
                plt.semilogy(vals,
                             lognorm.pdf(vals, *fit_params2[:-1],
                                         scale=fit_params2[-1],
                                         loc=0),
                             color=plot_kwargs2['color'], linestyle='-')

            plt.grid(True)
            plt.xlabel(xlabel)
            plt.ylabel("PDF")
            plt.legend(frameon=True)

            # ECDF
            ax2 = plt.subplot(122)
            ax2.yaxis.tick_right()
            ax2.yaxis.set_label_position("right")
            if self.normalization_type is not None:
                ax2.plot(self.bin_centers, self.PDF1.ecdf,
                         color=plot_kwargs1['color'], linestyle='-',
                         marker=plot_kwargs1['marker'],
                         label=plot_kwargs1['label'])

                ax2.plot(self.bin_centers, self.PDF2.ecdf,
                         color=plot_kwargs2['color'], linestyle='-',
                         marker=plot_kwargs2['marker'],
                         label=plot_kwargs2['label'])

                if self._do_fit:
                    ax2.plot(vals,
                             lognorm.cdf(vals,
                                         *fit_params1[:-1],
                                         scale=fit_params1[-1],
                                         loc=0),
                             color=plot_kwargs1['color'], linestyle='-',)

                    ax2.plot(vals,
                             lognorm.cdf(vals,
                                         *fit_params2[:-1],
                                         scale=fit_params2[-1],
                                         loc=0),
                             color=plot_kwargs2['color'], linestyle='-',)

            else:
                ax2.semilogx(self.bin_centers, self.PDF1.ecdf,
                             color=plot_kwargs1['color'], linestyle='-',
                             marker=plot_kwargs1['marker'],
                             label=plot_kwargs1['label'])

                ax2.semilogx(self.bin_centers, self.PDF2.ecdf,
                             color=plot_kwargs2['color'], linestyle='-',
                             marker=plot_kwargs2['marker'],
                             label=plot_kwargs2['label'])

                if self._do_fit:
                    ax2.semilogx(vals,
                                 lognorm.cdf(vals, *fit_params1[:-1],
                                             scale=fit_params1[-1],
                                             loc=0),
                                 color=plot_kwargs1['color'], linestyle='-',)

                    ax2.semilogx(vals,
                                 lognorm.cdf(vals, *fit_params2[:-1],
                                             scale=fit_params2[-1],
                                             loc=0),
                                 color=plot_kwargs2['color'], linestyle='-',)

            plt.grid(True)
            plt.xlabel(xlabel)
            plt.ylabel("ECDF")

            plt.tight_layout()

            if save_name is not None:
                plt.savefig(save_name)
                plt.close()
            else:
                plt.show()

        return self

예제 #39

0

파일 보기

파일: uchvc_random.py 프로젝트: bjanesh/uchvc-tools

	x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
	sig_vals_r.append(S_r[x_cent_r][y_cent_r])
# valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True)
# vals = np.loadtxt('values.txt', usecols=(0,), unpack=True)

bins, edges = np.histogram(sig_vals_r, bins=400, range=[2,22], normed=True)
centers = (edges[:-1] + edges[1:])/2.
# plt.scatter(centers, bins, edgecolors='none')

x = np.linspace(2, 22, 4000)
dists = np.array([3.958,3.685,3.897,3.317])
al,loc,beta=lognorm.fit(sig_vals_r)
print al, loc, beta
# plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606')
print lognorm.cdf(dists, al, loc=loc, scale=beta)

bins, edges = np.histogram(sig_vals_r, bins=400, range=[2,22], normed=True)
centers = (edges[:-1] + edges[1:])/2.
plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples')

# x = np.linspace(2, 22, 4000)
# dists = np.array([3.958,3.685,3.897,3.317])
# al,loc,beta=lognorm.fit(valsLP)
# print al, loc, beta
plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal distribution')
print lognorm.cdf(dists, al, loc=loc, scale=beta)

ax = plt.subplot(111)
# plt.plot([3.958,3.958],[-1.0,2.0],'k-', lw=5, alpha=1.0, label='best AGC198606 detection') 
# plt.plot([10.733,10.733],[-1.0,2.0],'k-', lw=5, alpha=0.5, label='Leo P detection at 1.74 Mpc')

예제 #40

0

파일 보기

파일: hw10_1.py 프로젝트: jamie-hong/scripts

q = np.ones(np.size(y))/np.size(y) # prior belief (if uniform: cross entropy = regular entropy)
dual = me.MaxEntDual(q, a, u, e)

res = minimize(dual.dual, np.zeros(len(u)), jac=dual.grad, method="BFGS")
pdf_y = dual.dist(res.x);

figure(figsize=[21, 5.5])
subplot(1, 3, 1)
plot(y, pdf_y/dy);
xlim(0, 8)
xlabel('$y$')
title('$\mathbb{E}[\log{(y)}] = 0, \; \mathbb{E}[\log^2{(y)}] = 1,  \; y \in (0, 100) $');

subplot(1, 3, 2)
cdf_y = np.cumsum(pdf_y)
cdf_logn = lognorm.cdf(y,1)
plot(cdf_y, cdf_logn,'o')
xlabel('$F_Y(y)$')
ylabel('$F_{lognorm}(y)$')
title('Q-Q plot of ME distribution vs lognormal',fontsize=13)

subplot(1, 3, 3)
plot(np.log(y), dual.dist(res.x)*y/dy)
xlim(-6, 6)
xlabel('$\log(y)$')
title('$\mathbb{E}[\log{(y)}] = 0, \; \mathbb{E}[\log^2{(y)}] = 1,  \; y \in (0, 100) $');

# 5
qn = norm.pdf(np.log(y))
qn = qn/np.sum(qn)

예제 #41

0

파일 보기

파일: stat_logNormalCDF.py 프로젝트: herrmanntom/setlX

from scipy.stats import lognorm
print(lognorm.cdf(1,0.5**2,0,1))

예제 #42

0

파일 보기

파일: example.py 프로젝트: gnilson/ProbitScale

    def __init__(self, a, b, n, name, pa=0.1, pb=0.9, lognormal=False, Plot=True):

        mscale.register_scale(ProbitScale)

        if Plot:
            fig = plt.figure(facecolor="white")
            ax1 = fig.add_subplot(121, axisbelow=True)
            ax2 = fig.add_subplot(122, axisbelow=True)
            ax1.set_xlabel(name)
            ax1.set_ylabel("ECDF and Best Fit CDF")
            prop = matplotlib.font_manager.FontProperties(size=8)

        if lognormal:

            sigma = (log(b) - log(a)) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5))
            mu = log(a) - erfinv(2 * pa - 1) * sigma * (2 ** 0.5)
            cdf = arange(0.001, 1.000, 0.001)
            ppf = map(lambda v: lognorm.ppf(v, sigma, scale=exp(mu)), cdf)

            x = lognorm.rvs(sigma, scale=exp(mu), size=n)
            x.sort()

            print "generating lognormal %s, p50 %0.3f, size %s" % (name, exp(mu), n)
            x_s, ecdf_x = ecdf(x)

            best_fit = lognorm.cdf(x, sigma, scale=exp(mu))
            if Plot:
                ax1.set_xscale("log")
                ax2.set_xscale("log")
            hist_y = lognorm.pdf(x_s, std(log(x)), scale=exp(mu))

        else:

            sigma = (b - a) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5))
            mu = a - erfinv(2 * pa - 1) * sigma * (2 ** 0.5)
            cdf = arange(0.001, 1.000, 0.001)
            ppf = map(lambda v: norm.ppf(v, mu, scale=sigma), cdf)

            print "generating normal %s, p50 %0.3f, size %s" % (name, mu, n)
            x = norm.rvs(mu, scale=sigma, size=n)
            x.sort()
            x_s, ecdf_x = ecdf(x)
            best_fit = norm.cdf((x - mean(x)) / std(x))
            hist_y = norm.pdf(x_s, loc=mean(x), scale=std(x))
            pass

        if Plot:
            ax1.plot(ppf, cdf, "r-", linewidth=2)
            ax1.set_yscale("probit")
            ax1.plot(x_s, ecdf_x, "o")

            ax1.plot(x, best_fit, "r--", linewidth=2)

            n, bins, patches = ax2.hist(x, normed=1, facecolor="green", alpha=0.75)
            bincenters = 0.5 * (bins[1:] + bins[:-1])
            ax2.plot(x_s, hist_y, "r--", linewidth=2)
            ax2.set_xlabel(name)
            ax2.set_ylabel("Histogram and Best Fit PDF")
            ax1.grid(b=True, which="both", color="black", linestyle="-", linewidth=1)
            # ax1.grid(b=True, which='major', color='black', linestyle='--')
            ax2.grid(True)

        return

예제 #43

0

파일 보기

파일: wseg10_casualties.py 프로젝트: GOFAI/glasstone

def fatality_fraction(x, y):
    erd = w.dose(x, y, dunits='mi', doseunits='Roentgen')
    if erd > 2000.0:
        return 1.01
    else:
        return lognorm.cdf(erd, 0.42, scale=450)

예제 #44

0

파일 보기

파일: analyze_arm_obs.py 프로젝트: vlarson/class-scripts

sigmaInit = 2*sqrt(truncVarnce)
mu, sigma = findTruncNormalRoots(truncMean,truncVarnce,muInit,sigmaInit,minThreshRefl)
print "mu = %s" %mu
print "sigma = %s" %sigma

# Compute empirical distribution function of data
reflCompressedSorted = sort(reflCompressed)
reflEdf = (rankdata(reflCompressedSorted) - 1)/lenReflCompressed
normCdf = norm.cdf( reflCompressedSorted, loc=truncMean, scale=sqrt(truncVarnce) )
truncNormCdf = ( norm.cdf(reflCompressedSorted,mu,sigma) \
                                      - norm.cdf((minRefl-mu)/sigma) ) \
                          /(1.0-norm.cdf((minRefl-mu)/sigma))
minRefl = amin(reflCompressedSorted)
expMuLogN = (truncMean-minRefl)/sqrt(1+truncVarnce/((truncMean-minRefl)**2))
sigma2LogN = log(1+truncVarnce/((truncMean-minRefl)**2))
lognormCdf = lognorm.cdf( reflCompressedSorted - minRefl, sqrt(sigma2LogN),
                      loc=0, scale=expMuLogN )

#pdb.set_trace()

DnNormCdf = findKSDn(normCdf, reflEdf)
DnTruncNormCdf = findKSDn(truncNormCdf, reflEdf)
DnLognormCdf = findKSDn(lognormCdf, reflEdf)
print "KS statistic Dn"
print "DnNormCdf = %s" %DnNormCdf
print "DnTruncNormCdf = %s" %DnTruncNormCdf
print "DnLognormCdf = %s" %DnLognormCdf

plt.clf()

# Plot cumulative distribution functions
# Empirical CDF