def dPDF(pts,mu,sigma, distribuition, outlier = 0, data = 0, n=10, seed = None):
    import numpy as np
    from scipy.interpolate import interp1d
    from distAnalyze import dpdf, mediaMovel
    from scipy.stats import norm, lognorm
    
    eps = 5e-5
    ngrid = int(1e6)

    if distribuition == 'normal':
        outlier_inf = outlier_sup = outlier  
        if not data:  
              inf, sup = norm.interval(0.9999, loc = mu, scale = sigma)
              x = np.linspace(inf-outlier_inf,sup+outlier_sup,ngrid)
              y = dpdf(x,mu,sigma,distribuition)
              
        else:
              np.random.set_state(seed)
              d = np.random.normal(mu,sigma,data)
              inf,sup = min(d)-outlier_inf,max(d)+outlier_sup
              
              y,x = np.histogram(d,bins = 'fd',normed = True)
              x = np.mean(np.array([x[:-1],x[1:]]),0)
              
              y = abs(np.diff(mediaMovel(y,n)))
              x = x[:-1]+np.diff(x)[0]/2
              
    elif distribuition == 'lognormal':
        outlier_inf = 0
        outlier_sup = outlier
        if not data:
              inf, sup = lognorm.interval(0.9999, sigma, loc = 0, scale = np.exp(mu))
              x = np.linspace(inf-outlier_inf,sup+outlier_sup,ngrid)
              y = dpdf(x,mu,sigma,distribuition)
        else:
              np.random.set_state(seed)
              d = np.random.lognormal(mu,sigma,data)
              inf,sup = min(d)-outlier_inf,max(d)+outlier_sup
              
              y,x = np.histogram(d,bins = 'fd',normed = True)
              x = np.mean(np.array([x[:-1],x[1:]]),0)
              
              y = abs(np.diff(mediaMovel(y,n)))
              x = x[:-1]+np.diff(x)[0]/2
              y = y/(np.diff(x)[0]*sum(y))
    #dy = lambda x,u,s : abs(1/(s**3*sqrt(2*pi))*(u-x)*np.exp(-0.5*((u-x)/s)**2))
    
  
    cdf = np.cumsum(y)
       
    #cdf = np.sum(np.tri(len(x))*y,1)    
    #cdf = np.concatenate(cdf)
    cdf = cdf/max(cdf)
    #time.time()-t
    
    interp = interp1d(cdf,x, fill_value = 'extrapolate')
    Y = np.linspace(eps,1-eps,pts)
    X = interp(Y)
    
    return X,Y
def logs(sigma=1, mu=0, area=0.9999):
    from scipy.stats import lognorm
    import numpy as np
    import matplotlib.pyplot as plt
    from numpy import log, exp

    scale = median = exp(mu)
    mode = exp(mu - sigma**2)
    mean = exp(mu + (sigma**2 / 2))
    shape = sigma
    a, b = lognorm.interval(area, shape, loc=0, scale=np.exp(mu))
    x = np.linspace(a, b, 1000000)
    mode = exp(mu - shape**2)
    mean = exp(mu + (shape**2 / 2))
    pdf = lognorm.pdf(x, shape, loc=0, scale=scale)
    plt.figure(figsize=(12, 8), dpi=200)
    plt.plot(x, pdf, label='PDF ($\sigma = %.2f$)' % shape)
    plt.vlines(mode, 0, pdf.max(), linestyle=':', label='Mode = %.2f' % mode)
    plt.vlines(mean,
               0,
               lognorm.pdf(mean, shape, loc=0, scale=scale),
               color='green',
               linestyle='--',
               label='Mean = %.2f' % mean)
    plt.vlines(median,
               0,
               lognorm.pdf(median, shape, loc=0, scale=scale),
               color='blue',
               label='Median = %.2f' % median)
    plt.legend(loc=1)

    plt.legend(prop={'size': 18})
    plt.xlabel('x', fontsize=45)
    plt.ylabel('Probability', fontsize=45)
    plt.xticks(size=18)
    plt.yticks(size=18)
    plt.tight_layout()
예제 #3
0
def plot_prob_density(mu, la, predsData, testData, xmin, xmax):
    from scipy.stats import lognorm

    fig, axes = plt.subplots(1, 1, figsize=(5, 4), sharey=True, dpi=120)
    font = "Times New Roman"

    f3 = lambda x, mu, la: (1 / x * la * (2 * math.pi)**0.5) * np.exp(-(
        (np.log(x) - mu)**2) / (2 * la**2))

    x2 = np.linspace(0, xmax, 300)

    axes.plot(x2, f3(x2, mu, la))
    ymin, ymax = axes.get_ylim()

    x_bounds = lognorm.interval(alpha=0.95, s=la, scale=np.exp(mu))
    x_bounds_std = lognorm.interval(alpha=0.68, s=la, scale=np.exp(mu))

    axes.axvline(x=testData.sum(), color='red', linestyle=':')
    ymaxes = f3(np.asarray(x_bounds), mu, la) / ymax + 0.01

    axes.axvline(x=x_bounds[0], color='blue', alpha=0.3, linestyle=':')
    axes.axvline(x=x_bounds[1], color='blue', alpha=0.3, linestyle=':')

    xfill = np.linspace(x_bounds[0], x_bounds[1], 100)
    xfill_std = np.linspace(x_bounds_std[0], x_bounds_std[1], 100)

    axes.fill_between(xfill, f3(xfill, mu, la), alpha=0.1, color='blue')
    axes.fill_between(xfill_std,
                      f3(xfill_std, mu, la),
                      alpha=0.1,
                      color='blue')

    #axes.fill_between(xfill,)
    axes.text(x=testData.sum() + 1,
              y=.03 * ymax,
              s='Actual: ' + str(int(testData.sum())),
              color='red')
    #axes.text(x=x_bounds[1]+1,y=ymax*.9,s='Upper 95%:',color='blue')
    #axes.text(x=x_bounds[1]+1,y=ymax*.82,s=str(round(x_bounds[1],1)),color='blue')
    #axes.text(x=x_bounds[0]-10,y=ymax*.9,s='Lower 95%:',color='blue')
    #axes.text(x=x_bounds[0]-10,y=ymax*.82,s=str(round(x_bounds[0],1)),color='blue')
    axes.set_xlabel('Number of days exceeding threshold',
                    fontname=font,
                    fontweight="heavy",
                    fontsize=12)
    axes.set_ylabel('Probability density function (-)',
                    fontname=font,
                    fontweight="heavy",
                    fontsize=12)
    axes.set_ylim(0, ymax)
    axes.set_xlim(0, xmax)

    labels = axes.get_xticklabels() + axes.get_yticklabels()
    [label.set_fontname(font) for label in labels]
    fig.show()

    print('**********************************')
    print('Expected number of days exceeding thermal comfort criteria: ' +
          str(round(lognorm.mean(s=la, scale=np.exp(mu)), 1)) + ' +/- ' +
          str(round(lognorm.std(s=la, scale=np.exp(mu)), 1)))
    print('Most likely number of days exceeding thermal comfort criteria: ' +
          str(round(np.exp(mu - la**2))) + ' +/- ' +
          str(round(lognorm.std(s=la, scale=np.exp(mu)), 1)))
    print(
        'Predicted number of days exceeding thermal comfort criteria (deterministic): '
        + str(int(np.sum(predsData))))
    print('Actual number of days exceeding thermal comfort criteria: ' +
          str(int(testData.sum())))
    print('**********************************')
    from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
    acc_score = accuracy_score(predsData, testData)
    prec_score = precision_score(predsData, testData)
    rec_score = recall_score(predsData, testData)
    roc_auc_score = roc_auc_score(predsData, testData)
    print("Test Accuracy score: ", acc_score)
    print("Test Precision score: ", prec_score)
    print("Test Recall score: ", rec_score)
    print("Test ROC AUC score: ", roc_auc_score)
예제 #4
0
def ddPDF(pts, mu, sigma, distribuition, outlier=0, data=0, n=10, seed=None):
    import numpy as np
    from scipy.interpolate import interp1d
    from distAnalyze import ddpdf, mediaMovel
    from scipy.stats import norm, lognorm
    from someFunctions import ash
    eps = 5e-5
    ngrid = int(1e6)
    #ddy = lambda x,u,s: abs(-(s**2-u**2+2*u*x-x**2)/(s**5*sqrt(2*pi))*np.exp(-0.5*((u-x)/s)**2))
    if distribuition == 'normal':
        outlier_inf = outlier_sup = outlier
        if not data:
            inf, sup = norm.interval(0.9999, loc=mu, scale=sigma)
            x = np.linspace(inf - outlier_inf, sup + outlier_sup, ngrid)
            y = ddpdf(x, mu, sigma, distribuition)
        else:
            np.random.set_state(seed)
            d = np.random.normal(mu, sigma, data)
            inf, sup = min(d) - outlier_inf, max(d) + outlier_sup

            #y,x = np.histogram(d,bins = 'fd',normed = True)
            #x = np.mean(np.array([x[:-1],x[1:]]),0)

            x, y = ash(d)
            y = abs(np.diff(y, 2))
            x = x[:-2] + np.diff(x)[0]

            #y = abs(np.diff(mediaMovel(y,n),2))
            #x = x[:-2]+np.diff(x)[0]
            y = y / (np.diff(x)[0] * sum(y))

    elif distribuition == 'lognormal':
        outlier_inf = 0
        outlier_sup = outlier
        inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=np.exp(mu))
        inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu))
        inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu))
        if not data:

            x = np.linspace(inf - outlier_inf, sup + outlier_sup, ngrid)
            y = ddpdf(x, mu, sigma, distribuition)
        else:
            np.random.set_state(seed)
            d = np.random.lognormal(mu, sigma, data)
            #inf,sup = min(d)-outlier_inf,max(d)+outlier_sup

            # y,x = np.histogram(d,bins = 'fd',normed = True)
            #x = np.mean(np.array([x[:-1],x[1:]]),0)

            x, y = ash(d)

            y = y[x < sup]
            x = x[x < sup]

            y = abs(np.diff(y, 2))

            #y = abs(np.diff(mediaMovel(y,n),2))
            x = x[:-2] + np.diff(x)[0]
            y = y / (np.diff(x)[0] * sum(y))

    #cdf = np.sum(np.tri(len(x))*y,1)
    cdf = np.cumsum(y)
    # =============================================================================
    #     for i in range(1,ngrid):
    #         cdf.append(y[i]+cdf[i-1])
    cdf = cdf / max(cdf)
    #
    # =============================================================================
    interp = interp1d(cdf, x, fill_value='extrapolate')
    Y = np.linspace(eps, 1 - eps, pts)
    X = interp(Y)

    return X, Y
예제 #5
0
def PDF(pts, mu, sigma, distribuition, outlier=0, data=0, seed=None):
    from scipy.stats import norm, lognorm
    import numpy as np
    from scipy.interpolate import interp1d
    from someFunctions import ash
    eps = 5e-5

    if distribuition == 'normal':
        outlier_inf = outlier_sup = outlier
        if not data:
            inf, sup = norm.interval(0.9999, loc=mu, scale=sigma)

            X1 = np.linspace(inf - outlier, mu, int(1e6))
            Y1 = norm.pdf(X1, loc=mu, scale=sigma)
            interp = interp1d(Y1, X1)
            y1 = np.linspace(Y1[0], Y1[-1], pts // 2 + 1)
            x1 = interp(y1)

            X2 = np.linspace(mu, sup + outlier, int(1e6))
            Y2 = norm.pdf(X2, loc=mu, scale=sigma)
            interp = interp1d(Y2, X2)
            y2 = np.flip(y1, 0)
            x2 = interp(y2)

        else:
            np.random.set_state(seed)
            d = np.random.normal(mu, sigma, data)
            inf, sup = min(d) - outlier_inf, max(d) + outlier_sup
            #yest,xest = np.histogram(d,bins = 'fd',normed = True)
            xest, yest = ash(d)
            xest = np.mean(np.array([xest[:-1], xest[1:]]), 0)
            M = np.where(yest == max(yest))[0][0]
            m = np.where(yest == min(yest))[0][0]
            interpL = interp1d(yest[:M + 1],
                               xest[:M + 1],
                               assume_sorted=False,
                               fill_value='extrapolate')
            interpH = interp1d(yest[M:],
                               xest[M:],
                               assume_sorted=False,
                               fill_value='extrapolate')

            y1 = np.linspace(yest[m] + eps, yest[M], pts // 2 + 1)
            x1 = interpL(y1)

            y2 = np.flip(y1, 0)
            x2 = interpH(y2)

    elif distribuition == 'lognormal':
        outlier_inf = 0
        outlier_sup = outlier
        inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=np.exp(mu))
        inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu))
        inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu))
        if not data:

            mode = np.exp(mu - sigma**2)

            X1 = np.linspace(inf - outlier_inf, mode, int(1e6))
            Y1 = lognorm.pdf(X1, sigma, loc=0, scale=np.exp(mu))
            interp = interp1d(Y1, X1)
            y1 = np.linspace(Y1[0], Y1[-1], pts // 2 + 1)
            x1 = interp(y1)

            X2 = np.linspace(mode, sup + outlier_sup, int(1e6))
            Y2 = lognorm.pdf(X2, sigma, loc=0, scale=np.exp(mu))
            interp = interp1d(Y2, X2)
            y2 = np.flip(y1, 0)
            x2 = interp(y2)
        else:
            np.random.set_state(seed)
            d = np.random.lognormal(mu, sigma, data)
            #inf,sup = min(d)-outlier_inf,max(d)+outlier_sup
            #yest,xest = np.histogram(d,bins = 'fd',normed = True)
            #xest = np.mean(np.array([xest[:-1],xest[1:]]),0)
            xest, yest = ash(d)
            yest = yest[xest < sup]
            xest = xest[xest < sup]
            M = np.where(yest == max(yest))[0][0]
            m = np.where(yest == min(yest))[0][0]
            interpL = interp1d(yest[:M + 1],
                               xest[:M + 1],
                               fill_value='extrapolate')
            interpH = interp1d(yest[M:], xest[M:])

            y1 = np.linspace(yest[m] + eps, yest[M], pts // 2 + 1)
            x1 = interpL(y1)

            y2 = np.flip(y1, 0)
            x2 = interpH(y2)

    X = np.concatenate([x1[:-1], x2])
    Y = np.concatenate([y1[:-1], y2])

    return X, Y
예제 #6
0
def diffArea(nest,
             outlier=0,
             data=0,
             kinds='all',
             axis='probability',
             ROI=20,
             mu=0,
             sigma=1,
             weight=False,
             interpolator='linear',
             distribuition='normal',
             seed=None,
             plot=True):
    """
    Return an error area between a analitic function and a estimated discretization from a distribuition.

    Parameters
    ----------
    nest: int
        The number of estimation points.
    outlier: int, optional
        Is the point of an outlier event, e.g outlier = 50 will put an event in -50 and +50 if mu = 0.
        Defaut is 0
    data: int, optional
        If data > 0, a randon data will be inserted insted analitcs data.
        Defaut is 0.
    kinds: str or array, optional
        specifies the kind of distribuition to analize.
        ('Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2', 'all').
        Defaut is 'all'.
    axis: str, optional
        specifies the x axis to analize
        ('probability', 'derivative', '2nd_derivative', 'X').
        Defaut is 'probability'.
    ROI: int, optional
        Specifies the number of regions of interest.
        Defaut is 20.
    mu: int, optional
        Specifies the mean of distribuition.
        Defaut is 0.
    sigma: int, optional
        Specifies the standard desviation of a distribuition.
        Defaut is 1.
    weight: bool, optional
        if True, each ROI will have a diferent weight to analyze.
        Defaut is False
    interpolator: str, optional
        Specifies the kind of interpolation as a string
        ('linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'
        where 'zero', 'slinear', 'quadratic' and 'cubic' refer to a spline
        interpolation of zeroth, first, second or third order) or as an
        integer specifying the order of the spline interpolator to use.
        Default is 'linear'.
    distribuition: str, optional
        Select the distribuition to analyze.
        ('normal', 'lognormal')
        Defaut is 'normal'
    plot: bool, optional
        If True, a plot will be ploted with the analyzes
        Defaut is True
        
    Returns
    -------
    a, [b,c]: float and float of ndarray. area,[probROIord,areaROIord]
       returns the sum of total error area and the 'x' and 'y' values.   
    

    """
    import numpy as np
    from scipy.stats import norm, lognorm
    from scipy.interpolate import interp1d
    from numpy import exp
    import matplotlib.pyplot as plt
    from statsmodels.distributions import ECDF
    from distAnalyze import pdf, dpdf, ddpdf, PDF, dPDF, ddPDF

    area = []
    n = []
    data = int(data)
    if distribuition == 'normal':
        outlier_inf = outlier_sup = outlier
    elif distribuition == 'lognormal':
        outlier_inf = 0
        outlier_sup = outlier

    ngrid = int(1e6)
    truth = pdf

    if axis == 'probability':
        truth1 = pdf
    elif axis == 'derivative':
        truth1 = dpdf
    elif axis == '2nd_derivative':
        truth1 = ddpdf
    elif axis == 'X':
        truth1 = lambda x, mu, sigma, distribuition: x
    #else: return 'No valid axis'

    probROIord = {}
    areaROIord = {}
    div = {}
    if seed is not None:
        np.random.set_state(seed)
    if data:
        if distribuition == 'normal':
            d = np.random.normal(mu, sigma, data)
        elif distribuition == 'lognormal':
            d = np.random.lognormal(mu, sigma, data)

    if kinds == 'all':
        kinds = ['Linspace', 'CDFm', 'PDFm', 'iPDF1', 'iPDF2']
    elif type(kinds) == str:
        kinds = [kinds]

    for kind in kinds:
        if distribuition == 'normal':
            inf, sup = norm.interval(0.9999, loc=mu, scale=sigma)

        elif distribuition == 'lognormal':
            inf, sup = lognorm.interval(0.9999, sigma, loc=0, scale=exp(mu))
            inf = lognorm.pdf(sup, sigma, loc=0, scale=np.exp(mu))
            inf = lognorm.ppf(inf, sigma, loc=0, scale=np.exp(mu))

        xgrid = np.linspace(inf, sup, ngrid)
        xgridROI = xgrid.reshape([ROI, ngrid // ROI])

        dx = np.diff(xgrid)[0]

        if kind == 'Linspace':
            if not data:
                xest = np.linspace(inf - outlier_inf, sup + outlier_sup, nest)
            else:
                if distribuition == 'normal':
                    #d = np.random.normal(loc = mu, scale = sigma, size = data)
                    inf, sup = min(d), max(d)
                    xest = np.linspace(inf - outlier_inf, sup + outlier_sup,
                                       nest)
                elif distribuition == 'lognormal':
                    #d = np.random.lognormal(mean = mu, sigma = sigma, size = data)
                    inf, sup = min(d), max(d)
                    xest = np.linspace(inf - outlier_inf, sup + outlier_sup,
                                       nest)

            yest = pdf(xest, mu, sigma, distribuition)

        elif kind == 'CDFm':
            eps = 5e-5
            yest = np.linspace(0 + eps, 1 - eps, nest)
            if distribuition == 'normal':
                if not data:
                    xest = norm.ppf(yest, loc=mu, scale=sigma)
                    yest = pdf(xest, mu, sigma, distribuition)
                else:
                    #d = np.random.normal(loc = mu, scale = sigma, size = data)
                    ecdf = ECDF(d)
                    inf, sup = min(d), max(d)
                    xest = np.linspace(inf, sup, data)
                    yest = ecdf(xest)
                    interp = interp1d(yest,
                                      xest,
                                      fill_value='extrapolate',
                                      kind='nearest')
                    yest = np.linspace(eps, 1 - eps, nest)
                    xest = interp(yest)

            elif distribuition == 'lognormal':
                if not data:
                    xest = lognorm.ppf(yest, sigma, loc=0, scale=exp(mu))
                    yest = pdf(xest, mu, sigma, distribuition)
                else:
                    #d = np.random.lognormal(mean = mu, sigma = sigma, size = data)
                    ecdf = ECDF(d)
                    inf, sup = min(d), max(d)
                    xest = np.linspace(inf, sup, nest)
                    yest = ecdf(xest)
                    interp = interp1d(yest,
                                      xest,
                                      fill_value='extrapolate',
                                      kind='nearest')
                    yest = np.linspace(eps, 1 - eps, nest)
                    xest = interp(yest)

        elif kind == 'PDFm':
            xest, yest = PDF(nest, mu, sigma, distribuition, outlier, data,
                             seed)
        elif kind == 'iPDF1':
            xest, yest = dPDF(nest, mu, sigma, distribuition, outlier, data,
                              10, seed)
        elif kind == 'iPDF2':
            xest, yest = ddPDF(nest, mu, sigma, distribuition, outlier, data,
                               10, seed)

        YY = pdf(xest, mu, sigma, distribuition)
        fest = interp1d(xest,
                        YY,
                        kind=interpolator,
                        bounds_error=False,
                        fill_value=(YY[0], YY[-1]))

        #fest = lambda x: np.concatenate([fest1(x)[fest1(x) != -1],np.ones(len(fest1(x)[fest1(x) == -1]))*fest1(x)[fest1(x) != -1][-1]])

        yestGrid = []
        ytruthGrid = []
        ytruthGrid2 = []
        divi = []

        for i in range(ROI):
            yestGrid.append([fest(xgridROI[i])])
            ytruthGrid.append([truth(xgridROI[i], mu, sigma, distribuition)])
            ytruthGrid2.append([truth1(xgridROI[i], mu, sigma, distribuition)])
            divi.append(
                len(
                    np.intersect1d(
                        np.where(xest >= min(xgridROI[i]))[0],
                        np.where(xest < max(xgridROI[i]))[0])))

        diff2 = np.concatenate(
            abs((np.array(yestGrid) - np.array(ytruthGrid)) * dx))
        #diff2[np.isnan(diff2)] = 0
        areaROI = np.sum(diff2, 1)

        divi = np.array(divi)
        divi[divi == 0] = 1

        try:
            probROI = np.mean(np.sum(ytruthGrid2, 1), 1)
        except:
            probROI = np.mean(ytruthGrid2, 1)

        probROIord[kind] = np.sort(probROI)
        index = np.argsort(probROI)

        areaROIord[kind] = areaROI[index]
        #deletes = ~np.isnan(areaROIord[kind])
        #areaROIord[kind] = areaROIord[kind][deletes]
        #probROIord[kind] = probROIord[kind][deletes]

        area = np.append(area, np.sum(areaROIord[kind]))
        n = np.append(n, len(probROIord[kind]))
        div[kind] = divi[index]
        if plot:
            if weight:
                plt.logy(probROIord[kind],
                         areaROIord[kind] * div[kind],
                         '-o',
                         label=kind,
                         ms=3)
            else:
                plt.plot(probROIord[kind],
                         areaROIord[kind],
                         '-o',
                         label=kind,
                         ms=3)

            plt.yscale('log')
            plt.xlabel(axis)
            plt.ylabel('Error')
            plt.legend()

        #plt.title('%s - Pontos = %d, div = %s - %s' %(j,nest, divs,interpolator))

    return area, [probROIord, areaROIord]
예제 #7
0
    
sigma, loc, scale = lognorm.fit(data, floc=0)

# print sigma, loc, scale
# print lognorm.mean(sigma, loc=loc, scale=scale)


read_length_count = 0
"""
y_value = lognorm.pdf(data, sigma, loc, scale)
background = np.median(y_value)
"""



end_point = lognorm.interval(0.5, sigma, loc, scale)
print end_point

# calculate the homogenesous distribution as a comparable reference
background = 0.5/(end_point[1] - end_point[0])
print background


record_dict = SeqIO.index("D:/Data/20161125/filtered_subreads_first1k.fastq", "fastq")
target_seq = []
i= 0
id_list = list(record_dict.keys())
seq_num = len(id_list)
while read_length_count <= target_read_length:
    print read_length_count
    if i == seq_num:
from scipy.stats import lognorm

data2 = []

with open('datafile2.csv') as csvfile2:
    reader = csv.reader(csvfile2)
    for row in reader:
        data2.append(float(row[0]))

plt.figure()
_ = plt.hist(data2, bins=100)

# Parameter estimates for generic data
# the argument floc=0 to ensure that it does not treat the location as a free parameter
shape1, loc1, scale1 = lognorm.fit(data2, floc=0)
mu1 = np.log(scale1)
sigma1 = shape1
print("Estimated mu = " + str(mu1))
print("Estimated sigma = " + str(sigma1))
# 0.95 is the alpha value, which specifies a 95 percentile point, as the corresponding 1.96 standard deviations of the mean is given in the formula.
ci1 = lognorm.interval(0.95, s=sigma1, loc=loc1, scale=scale1)
print("Lognorm function CI = " + str(ci1))
# confidence interval left line
one_x12, one_y12 = [ci1[0], ci1[0]], [0, 20]
# confidence interval right line
two_x12, two_y12 = [ci1[1], ci1[1]], [0, 20]

plt.plot(one_x12, one_y12, two_x12, two_y12, marker='o')
plt.title("Lognormal distribution confidence interval")
plt.show()
예제 #9
0
def log_normal_distribution(
        radius_g: float, sigma_g: float,
        n_bins: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Function for returning a log-normal size distribution. See Eq. 9
    in Ackerman & Marley (2001).

    Parameters
    ----------
    radius_g : float
        Mean geometric radius (um).
    sigma_g : float
        Geometric standard deviation (dimensionless).
    n_bins : int
        Number of logarithmically-spaced radius bins.

    Returns
    -------
    np.ndarray
        Number of grains in each radius bin, normalized to a total of
        1 grain.
    np.ndarray
        Widths of the radius bins (um).
    np.ndarray
        Grain radii (um).
    """

    if sigma_g == 1.0:
        # The log-normal distribution is equal to a delta
        # function with sigma_g = 1
        radii = np.array([radius_g])
        r_width = np.array([np.nan])
        dn_grains = np.array([1.0])

    else:
        # Get the radius interval which contains 99.999%
        # of the distribution
        interval = lognorm.interval(1.0 - 1e-5,
                                    np.log(sigma_g),
                                    loc=0.0,
                                    scale=radius_g)

        # Create bin boundaries (um), so +1 because there
        # are n_bins+1 bin boundaries
        r_bins = np.logspace(np.log10(interval[0]), np.log10(interval[1]),
                             n_bins + 1)

        # Width of the radius bins (um)
        r_width = np.diff(r_bins)

        # Grain radii (um) at which the size distribution is sampled
        radii = (r_bins[1:] + r_bins[:-1]) / 2.0

        # Number of grains per radius bin width, normalized to an
        # integrated value of 1 grain, that is,
        # np.sum(dn_dr*r_width) = 1
        # The log-normal distribution from Ackerman & Marley 2001
        # gives the same result as scipy.stats.lognorm.pdf with
        # s = log(sigma_g) and scale=radius_g
        dn_dr = lognorm.pdf(radii, s=np.log(sigma_g), loc=0.0, scale=radius_g)

        # Number of grains for each radius bin
        dn_grains = dn_dr * r_width

    return dn_grains, r_width, radii