Beispiel #1
0
def plot_johnson_sb_fit(data,
                        fit_results,
                        title=None,
                        x_label=None,
                        x_range=None,
                        y_range=None,
                        fig_size=(6, 5),
                        bin_width=1,
                        filename=None):
    """
    :param data: (numpy.array) observations
    :param fit_results: dictionary with keys "a", "b", "loc", "scale", and "AIC"
    :param title: title of the figure
    :param x_label: label to show on the x-axis of the histogram
    :param x_range: (tuple) x range
    :param y_range: (tuple) y range
        (the histogram shows the probability density so the upper value of y_range should be 1).
    :param fig_size: int, specify the figure size
    :param bin_width: bin width
    :param filename: filename to save the figure as
    """

    plot_fit_continuous(data=data,
                        dist=stat.johnsonsb(a=fit_results['a'],
                                            b=fit_results['b'],
                                            loc=fit_results['loc'],
                                            scale=fit_results['scale']),
                        label='Johnson Sb',
                        bin_width=bin_width,
                        title=title,
                        x_label=x_label,
                        x_range=x_range,
                        y_range=y_range,
                        fig_size=fig_size,
                        filename=filename)
Beispiel #2
0
def getfracvalsfromfit(histfit, thefracs, numbins=2000, displayplots=True):
    """

    Parameters
    ----------
    histfit
    thefracs
    numbins
    displayplots

    Returns
    -------

    """
    # print('entering getfracvalsfromfit: histfit=',histfit, ' thefracs=', thefracs)
    thedist = johnsonsb(histfit[0], histfit[1], histfit[2], histfit[3])
    # print('froze the distribution')
    if displayplots:
        themin = 0.001
        themax = 0.999
        bins = np.arange(themin, themax, (themax - themin) / numbins)
        fig = pl.figure()
        ax = fig.add_subplot(111)
        ax.set_title('probability histogram')
        pl.plot(bins, johnsonsb.ppf(thefracs, histfit[0], histfit[1], histfit[2], histfit[3]))
        pl.show()
    # thevals = johnsonsb.ppf(thefracs, histfit[0], histfit[1], histfit[2], histfit[3])
    thevals = thedist.ppf(thefracs)
    return thevals
Beispiel #3
0
def getjohnsonppf(percentile, params, zeroterm):
    """

    Parameters
    ----------
    percentile
    params
    zeroterm

    Returns
    -------

    """
    johnsonfunc = johnsonsb(params[0], params[1], params[2], params[3])
    corrfac = 1.0 - zeroterm
Beispiel #4
0
def getjohnsonppf(percentile, params, zeroterm):
    """

    Parameters
    ----------
    percentile
    params
    zeroterm

    Returns
    -------

    """
    johnsonfunc = johnsonsb(params[0], params[1], params[2], params[3])
    corrfac = 1.0 - zeroterm
Beispiel #5
0
def getfracvalsfromfit(histfit, thefracs):
    """

    Parameters
    ----------
    histfit
    thefracs
    displayplots

    Returns
    -------

    """
    # print('entering getfracvalsfromfit: histfit=',histfit, ' thefracs=', thefracs)
    thedist = johnsonsb(histfit[0], histfit[1], histfit[2], histfit[3])
    thevals = thedist.ppf(thefracs)
    return thevals
def fit_johnsonSb(data, x_label, fixed_location=0, figure_size=5):
    """
    :param data: (numpy.array) observations
    :param x_label: label to show on the x-axis of the histogram
    :param figure_size: int, specify the figure size
    :returns: dictionary with keys "a", "b", "loc", "scale", and "AIC"
    """

    # plot histogram
    fig, ax = plt.subplots(1, 1, figsize=(figure_size + 1, figure_size))
    ax.hist(data,
            normed=1,
            bins='auto',
            edgecolor='black',
            alpha=0.5,
            label='Frequency')

    # estimate the parameters
    a, b, loc, scale = scs.johnsonsb.fit(data, floc=fixed_location)

    # plot the estimated JohnsonSb distribution
    x_values = np.linspace(scs.johnsonsb.ppf(0.01, a, b, loc, scale),
                           scs.johnsonsb.ppf(0.99, a, b, loc, scale), 100)
    rv = scs.johnsonsb(a, b, loc, scale)
    ax.plot_all(x_values,
                rv.pdf(x_values),
                color=COLOR_CONTINUOUS_FIT,
                lw=2,
                label='JohnsonSb')

    ax.set_xlabel(x_label)
    ax.set_ylabel("Frequency")
    ax.legend()
    plt.show()

    # calculate AIC
    aic = AIC(k=3,
              log_likelihood=np.sum(
                  scs.johnsonsb.logpdf(data, a, b, loc, scale)))

    # report results in the form of a dictionary
    return {"a": a, "b": b, "loc": loc, "scale": scale, "AIC": aic}
Beispiel #7
0
def fit_johnson_per_label(X, labels):
    '''
    Fits a Johnson Distribution (bounded) to a univariate variable X
    for each class label in labels

    Parameters
    ----------
    X : ndarray, N x T, univariate numeric.
    labels : ndarray, integer class labels.

    Returns
    -------
    distributions : list of rv_continous objects.
        List of fitted Johnson distributions, indexed by label.
    '''

    distributions = []
    for l in np.unique(labels):
        X_l = X[labels == l]
        X_lr = X_l.reshape((X_l.shape[0] * X_l.shape[1]), 1)
        a, b, loc, scale = stats.johnsonsb.fit(X_lr)
        d = stats.johnsonsb(a, b, loc=loc, scale=scale)
        distributions.append(d)
    return distributions
Beispiel #8
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
x = np.linspace(johnsonsb.ppf(0.01, a, b), johnsonsb.ppf(0.99, a, b), 100)
ax.plot(x,
        johnsonsb.pdf(x, a, b),
        'r-',
        lw=5,
        alpha=0.6,
        label='johnsonsb pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = johnsonsb(a, b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = johnsonsb.ppf([0.001, 0.5, 0.999], a, b)
np.allclose([0.001, 0.5, 0.999], johnsonsb.cdf(vals, a, b))
# True

# Generate random numbers:

r = johnsonsb.rvs(a, b, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)