Ejemplo n.º 1
0
 def test_nc_parameter(self):
     # Parameter values c<=0 were not enabled (gh-2402).
     # For negative values c and for c=0 results of rv.cdf(0) below were nan
     rv = stats.nct(5, 0)
     assert_equal(rv.cdf(0), 0.5)
     rv = stats.nct(5, -1)
     assert_almost_equal(rv.cdf(0), 0.841344746069, decimal=10)
Ejemplo n.º 2
0
 def test_nc_parameter(self):
     # Parameter values c<=0 were not enabled (gh-2402).
     # For negative values c and for c=0 results of rv.cdf(0) below were nan
     rv = stats.nct(5, 0)
     assert_equal(rv.cdf(0), 0.5)
     rv = stats.nct(5, -1)
     assert_almost_equal(rv.cdf(0), 0.841344746069, decimal=10)
Ejemplo n.º 3
0
 def text_variance_gh_issue_2401():
     # Computation of the variance of a non-central t-distribution resulted
     # in a TypeError: ufunc 'isinf' not supported for the input types,
     # and the inputs could not be safely coerced to any supported types
     # according to the casting rule 'safe'
     rv = stats.nct(4, 0)
     assert_equal(rv.var(), 2.0)
Ejemplo n.º 4
0
def initialize_nct_distribution(df, nc):
    np.seterr(over='ignore', invalid='ignore')
    test_value = stats.nct.pdf(x=nc, df=df, nc=nc)
    if np.isnan(test_value):
        return stats.t(df=df, loc=nc)
    else:
        return stats.nct(df=df, nc=nc)
Ejemplo n.º 5
0
 def text_variance_gh_issue_2401():
     # Computation of the variance of a non-central t-distribution resulted
     # in a TypeError: ufunc 'isinf' not supported for the input types,
     # and the inputs could not be safely coerced to any supported types
     # according to the casting rule 'safe'
     rv = stats.nct(4, 0)
     assert_equal(rv.var(), 2.0)
Ejemplo n.º 6
0
def ttest_ind_sample_size(mu1,
                          mu2,
                          s1,
                          s2,
                          r,
                          power,
                          sig_level=0.05,
                          alternative='two-sided',
                          pooled=True):

    n1 = 2  # initialisation
    n2 = n1 * r
    sim_power = 0

    while sim_power < power:

        n1 += 1
        n2 = n1 * r

        if pooled:

            dof = n1 + n2 - 2
            pooled_var = (((s1**2) * (n1 - 1)) + ((s2**2) * (n2 - 1))) / dof
            std_error = np.sqrt(pooled_var * (1 / n1 + 1 / n2))

        else:

            var1 = (s1**2)  # assuming unbiased sample standard deviation
            var2 = (s2**2)

            dof = (var1 / n1 + var2 / n2)**2
            dof /= ((((var1 / n1)**2) / (n1 - 1)) + (((var2 / n2)**2) /
                                                     (n2 - 1)))
            std_error = np.sqrt(var1 / n1 + var2 / n2)

        ncp = (mu2 - mu1) / std_error

        t_null = scs.t(df=dof, loc=0, scale=1)
        t_alt = scs.nct(df=dof, nc=ncp)

        if alternative == 'smaller':
            cv = t_null.ppf(sig_level)
            sim_power = t_alt.cdf(cv)

        elif alternative == 'larger':
            cv = t_null.ppf(1 - sig_level)
            sim_power = 1 - t_alt.cdf(cv)

        elif alternative == 'two-sided':
            cv = [t_null.ppf(sig_level / 2), t_null.ppf(1 - (sig_level / 2))]
            sim_power = sum([t_alt.cdf(cv[0]), 1 - t_alt.cdf(cv[1])])

    print('ncp: ', ncp)
    print('Critical t: ', cv)
    print('Actual Power: ', sim_power)

    return (np.ceil(n1), np.ceil(n2))
Ejemplo n.º 7
0
    def __init__(self, mu, sg, df, nc, lc, sc):
        self.norm = sps.norm(mu, sg)
        self.nct = sps.nct(df=df, nc=nc, loc=lc, scale=sc)

        modeL = nc * np.sqrt(df / (df + 5 / 2))
        modeU = nc * np.sqrt(df / (df + 1))
        self.nct.modeEst = sc * (modeL + modeU) / 2 + lc

        self.norm.max = self.norm.pdf(mu)
        self.nct.max = self.nct.pdf(self.nct.modeEst)
        self.max = self.norm.max * self.nct.max
Ejemplo n.º 8
0
def ttest_paired_sample_size(starting_n,
                             effect_size,
                             power=0.8,
                             sig_level=0.05,
                             alternative='two-sided'):

    n = starting_n  # initialisation
    sim_power = 0

    while sim_power < power:

        n += 1

        dof = n - 1
        ncp = effect_size * np.sqrt(n)

        t_null = scs.t(df=dof, loc=0, scale=1)
        t_alt = scs.nct(df=dof, nc=ncp)

        if alternative == 'smaller':
            cv = t_null.ppf(sig_level)
            sim_power = t_alt.cdf(cv)

        elif alternative == 'larger':
            cv = t_null.ppf(1 - sig_level)
            sim_power = 1 - t_alt.cdf(cv)

        elif alternative == 'two-sided':
            cv = [t_null.ppf(sig_level / 2), t_null.ppf(1 - (sig_level / 2))]
            sim_power = sum([t_alt.cdf(cv[0]), 1 - t_alt.cdf(cv[1])])

    print('ncp: ', ncp)
    print('Critical t: ', cv)
    print('Actual Power: ', sim_power)

    return np.ceil(n)
Ejemplo n.º 9
0
df, nc = 14, 0.24
mean, var, skew, kurt = nct.stats(df, nc, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(nct.ppf(0.01, df, nc), nct.ppf(0.99, df, nc), 100)
ax.plot(x, nct.pdf(x, df, nc), 'r-', lw=5, alpha=0.6, label='nct pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = nct(df, nc)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = nct.ppf([0.001, 0.5, 0.999], df, nc)
np.allclose([0.001, 0.5, 0.999], nct.cdf(vals, df, nc))
# True

# Generate random numbers:

r = nct.rvs(df, nc, size=1000)

# And compare the histogram:

ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
Ejemplo n.º 10
0
c, d = 10.5, 4.3
mean, var, skew, kurt = burr.stats(c, d, moments='mvsk')

x = np.linspace(burr.ppf(0.01, c, d), burr.ppf(0.99, c, d), 100)
burr.ppf

print(stats.norm.__doc__)

alpha, loc, beta = b[0], b[1], b[2]
pdf = nct.pdf()
data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=5000)
myHist = plt.hist(distribution_car_price, 500, normed=True)
rv = ss.genlogistic(alpha, loc, beta)
x = np.linspace(0, 500000)
h = plt.plot(x, rv.pdf(x), lw=2)

axes = plt.gca()
axes.set_xlim([0, 150000])
plt.show()

alpha, loc, beta = b[0], b[1], b[2]
data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=10000)
myHist = plt.hist(distribution_car_price, 500, normed=True)
rv = ss.nct(a[0], a[1], a[2], a[3])
x = np.linspace(0, 500000)
h = plt.plot(x, rv.pdf(x), lw=2)

axes = plt.gca()
axes.set_xlim([0, 150000])
plt.show()
Ejemplo n.º 11
0
                                                       9.1193851632305201,
                                                       261.3457987967214)
drivingduration_model_dict['exponweib'] = st.exponweib(2.6443841639764942,
                                                       0.89242254172118096,
                                                       10.603640861374947,
                                                       40.28556311444698)
drivingduration_model_dict['gengamma'] = st.gengamma(4.8743515108339581,
                                                     0.61806208678747043,
                                                     9.4649293818479716,
                                                     5.431576919220225)
drivingduration_model_dict['recipinvgauss'] = st.recipinvgauss(
    0.499908918842556, 0.78319699707613699, 28.725450197674746)
drivingduration_model_dict['f'] = st.f(9.8757694313677113, 12.347442183821462,
                                       0.051160749890587665,
                                       73.072591767722287)

carprice_model_dict = ct.OrderedDict()
carprice_model_dict['nct'] = st.nct(7.3139456577106312, 3.7415255108348946,
                                    -46.285705145385577, 7917.0860181436065)
carprice_model_dict['genlogistic'] = st.genlogistic(10.736440967148635,
                                                    3735.7049978006107,
                                                    10095.421377235754)
carprice_model_dict['gumbel_r'] = st.gumbel_r(26995.077239517472,
                                              10774.370808211244)
carprice_model_dict['f'] = st.f(24168.523476867485, 35.805656864712923,
                                -21087.314142557225, 51154.0328397044)
carprice_model_dict['johnsonsu'] = st.johnsonsu(-1.7479864366935538,
                                                1.8675670208081987,
                                                14796.793096897647,
                                                14716.575397771712)
Ejemplo n.º 12
0
q = np.linspace(0, 1, 100)
e_param = stats.norminvgauss.fit(euro_log)
VaR = stats.norminvgauss(param[0], param[1], param[2], param[3]).ppf(q)
e_var = stats.norminvgauss(e_param[0], e_param[1], e_param[2], e_param[3]).ppf(q)
d, p = stats.kstest(logreturns[2], cdf = 'norminvgauss', args = (param[0], param[1], param[2], param[3]))

ks.append(p)
#plt.plot(q, VaR, 'r-', label = 'Dogecoin')
#plt.plot(q, e_var, 'b--', label = 'Euro')
#plt.legend()

################LITECOIN###############
param = [1.8693373494312953, -0.08195764686173776, 0.0017432733966738605, 0.021626187586807188]
q = np.linspace(0, 1, 100)
e_param = stats.nct.fit(euro_log)
VaR = stats.nct(param[0], param[1], param[2], param[3]).ppf(q)
e_var = stats.nct(e_param[0], e_param[1], e_param[2], e_param[3]).ppf(q)
d, p = stats.kstest(logreturns[3], cdf = 'nct', args = (param[0], param[1], param[2], param[3]))

ks.append(p)
#plt.plot(q, VaR, 'r-', label = 'Litecoin')
#plt.plot(q, e_var, 'b--', label = 'Euro')
#plt.legend()

################NEXUS################
param = [2.269163744926829, -0.22915037600364918, 0.014003933425093236, 0.048491139607216696]
q = np.linspace(0, 1, 100)
e_param = stats.nct.fit(euro_log)
VaR = stats.nct(param[0], param[1], param[2], param[3]).ppf(q)
e_var = stats.nct(e_param[0], e_param[1], e_param[2], e_param[3]).ppf(q)
d, p = stats.kstest(logreturns[4], cdf = 'nct', args = (param[0], param[1], param[2], param[3]))
Ejemplo n.º 13
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
Ejemplo n.º 14
0
dist_continu = [
    d for d in dir(stats) if isinstance(getattr(stats, d), stats.rv_continuous)
]
dist_discrete = [
    d for d in dir(stats) if isinstance(getattr(stats, d), stats.rv_discrete)
]
print 'number of continuous distributions:', len(dist_continu)
print 'number of discrete distributions:  ', len(dist_discrete)

##Distributions can be used in one of two ways, either by passing all distribution
##parameters to each method call or by freezing the parameters for the instance
##of the distribution. As an example, we can get the median of the distribution by using
##the percent point function, ppf, which is the inverse of the cdf:

print stats.nct.ppf(0.5, 10, 2.5)
my_nct = stats.nct(10, 2.5)
print my_nct.ppf(0.5)

##`help(stats.nct)` prints the complete docstring of the distribution. Instead
##we can print just some basic information:

print stats.nct.extradoc  #contains the distribution specific docs

print 'number of arguments: %d, shape parameters: %s' % (stats.nct.numargs,
                                                         stats.nct.shapes)
print 'bounds of distribution lower: %s, upper: %s' % (stats.nct.a,
                                                       stats.nct.b)

##We can list all methods and properties of the distribution with
##`dir(stats.nct)`. Some of the methods are private methods, that are
##not named as such, i.e. no leading underscore, for example veccdf or
Ejemplo n.º 15
0
 def get_H1_statistic_distribution(self):
     effect_size,N = self.effect_size,self.N
     ncp = self.get_ncp() #effect_size * np.sqrt(N)
     return stats.nct(N-1,ncp)
    def run(self):
        self.collector = []
        #main op
        for contamination in tqdm.tqdm(self.c_grid):
            samps = int(contamination * self.n_samples)
            if samps < 2:
                continue

            #init running metrics
            running_metrics = defaultdict(list)
            for k in self.k_grid:
                clf = LocalOutlierFactor(n_neighbors=k,
                                         contamination=contamination)
                clf.fit_predict(self.data)
                X_scores = np.log(-clf.negative_outlier_factor_)
                t0 = X_scores.argsort()  #[::-1]
                top_k = t0[-samps:]
                min_k = t0[:samps]

                x_out = X_scores[top_k]
                x_in = X_scores[min_k]

                mc_out = np.mean(x_out)
                mc_in = np.mean(x_in)
                vc_out = np.var(x_out)
                vc_in = np.var(x_in)
                Tck = (mc_out - mc_in) / np.sqrt(
                    (self.eps + ((1 / samps) * (vc_out + vc_in))))

                running_metrics['tck'].append(Tck)
                running_metrics['mck_out'].append(mc_out)
                running_metrics['mck_in'].append(mc_in)
                running_metrics['vck_in'].append(vc_in)
                running_metrics['vck_out'].append(vc_out)

            largest_idx = np.array(running_metrics['tck']).argsort()[-1]
            mean_mc_out = np.mean(running_metrics['mck_out'])
            mean_mc_in = np.mean(running_metrics['mck_in'])
            mean_vc_out = np.mean(running_metrics['vck_out'])
            mean_vc_in = np.mean(running_metrics['vck_in'])

            #ncpc - non-centrality parameter
            ncpc = (mean_mc_out - mean_mc_in) / np.sqrt(
                (self.eps + ((1 / samps) * (mean_vc_out + mean_vc_in))))
            #dfc - degrees of freedom
            dfc = (2 * samps) - 2

            if dfc <= 0:
                continue

            Z = nct(dfc, ncpc)  #non-central t-distribution
            Kopt = self.k_grid[largest_idx]
            Topt = running_metrics['tck'][largest_idx]
            Z = Z.cdf(Topt)
            self.collector.append([Kopt, Topt, Z, contamination])

        max_cdf = 0.
        self.tuned_params = {}
        for v in self.collector:
            Kopt, Topt, Z, contamination = v
            if Z > max_cdf:
                max_cdf = Z

            if max_cdf == Z:
                self.tuned_params['k'] = Kopt
                self.tuned_params['c'] = contamination

        print("\nTuned LOF Parameters : {}".format(self.tuned_params))
        return