Python cdf Beispiele, scipy.stats.distributions.norm.cdf Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: ProbabilityIntegralTransform.py Projekt: Karagul/CQFProject

def HybridNormalGPDPDF(xs, u, mu, sigma, shape, loc, scale):
    '''
    Params: 
        xs: unsorted list of datat to fit semi-parametric PDF to.
        u: threshold to move from Gaussian PDF Fit in center to GPD tail fitting.
        mu:  mean of the data.
        sigma: standard deviation of the data.
        shape: gpd least squares estimated shape parameter.
        loc: gpd least squares estimated location parameter.
        scale: gpd least squares estimated scale parameter.
    Returns:
        an array that would result from xs.apply(semiparametric_fittedfunction) or F_n(xs) where F_n is the PDF fit.
    '''
    out = list()
    l = (mu - abs(u - mu))
    h = (mu + abs(u - mu))
    #print('u = %.10f,l = %.10f,h = %.10f'%(u,l,h))
    for x in xs:
        if x < l:
            out.append(
                norm.cdf(l, mu, sigma) *
                genpareto.pdf(l - x, shape, loc=loc, scale=scale))
        elif x >= h:
            out.append((1 - norm.cdf(h, mu, sigma)) *
                       genpareto.pdf(x - h, shape, loc=loc, scale=scale))
        else:
            out.append(norm.pdf(x, mu, sigma))
    return out

Beispiel #2

0

Datei anzeigen

Datei: FortetEstimator.py Projekt: aviolov/SinSpikePython

    def loss_function(abg):
#            #SIMPLE: Penalize negative a's, we want a positive, b/c for a<0, the algorithm is different:
#            if min(abg)<.0 or max(abg) > 5.:
#                return 1e6#
            error = .0;
            for (phi_m, phi_idx) in zip(phis, xrange(N_phi)):
                Is = bins[phi_m]['Is']
                uniqueIs = bins[phi_m]['unique_Is']
                 
                a,b,g = abg[0], abg[1], abg[2]
                movingThreshold = getMovingThreshold(a,g, phi_m)
                
                LHS_numerator = movingThreshold(uniqueIs[1:]) *sqrt(2.)
                LHS_denominator = b * sqrt(1 - exp(-2*uniqueIs[1:]))
                LHS = 1 -  norm.cdf(LHS_numerator / LHS_denominator)
                
                RHS = zeros_like(LHS)
                N  = len(Is)
                for rhs_idx in xrange(1,len(uniqueIs)):
                    t = uniqueIs[rhs_idx]
                    lIs = Is[Is<t]
                    taus = t - lIs;
                    
                    numerator = (movingThreshold(t) - movingThreshold(lIs)* exp(-taus)) * sqrt(2.)
                    denominator = b *  sqrt(1. - exp(-2*taus))
                    RHS[rhs_idx-1] = sum(1. - norm.cdf(numerator/denominator)) / N
                
#                error += sum(abs(LHS - RHS));
                error += sum((LHS - RHS)**2);
#                error += max(abs(LHS - RHS))

            return error

Beispiel #3

0

Datei anzeigen

Datei: multitest.py Projekt: CHEN-JIANGHANG/statsmodels

        def fun(params):
            """
            Negative log-likelihood of z-scores.

            The function has three arguments, packed into a vector:

            mean : location parameter
            logscale : log of the scale parameter
            logitprop : logit of the proportion of true nulls

            The implementation follows section 4 from Efron 2008.
            """

            d, s, p = xform(params)

            # Mass within the central region
            central_mass = (norm.cdf((null_ub - d) / s) -
                            norm.cdf((null_lb - d) / s))

            # Probability that a Z-score is null and is in the central region
            cp = p * central_mass

            # Binomial term
            rval = n_zs0 * np.log(cp) + (n_zs - n_zs0) * np.log(1 - cp)

            # Truncated Gaussian term for null Z-scores
            zv = (zscores0 - d) / s
            rval += np.sum(-zv**2 / 2) - n_zs0 * np.log(s)
            rval -= n_zs0 * np.log(central_mass)

            return -rval

Beispiel #4

0

Datei anzeigen

Datei: equalizer.py Projekt: creasyw/Courses

def plotlmmse():
    nsample = 10**5
    snrlst = range(0, 19)

    plt.subplot(211)
    pe = []
    for snr in snrlst:
        coeff = lmmse_coeff(tap1, 41, snr)
        delta_square = 10**(-snr / 10.) * sum(coeff**2)
        pe.append(1 - norm.cdf(sqrt((1 - 0.7)**2 / delta_square)))
    plt.semilogy(snrlst, pe, snrlst, equalizer(2, 41, snrlst, nsample,
                                               'lmmse'), "-.")
    plt.legend(("Theoretical curve", "Simulated curve"), loc='lower left')
    plt.title("Theoretical vs. Simulated performances for Channel 1")
    plt.xlabel("SNR (dB)")
    plt.ylabel("SER (dB)")
    plt.grid(True, which='both')

    plt.subplot(212)
    pe = []
    for snr in snrlst:
        coeff = lmmse_coeff(tap2, 41, snr)
        delta_square = 10**(-snr / 10.) * sum(coeff**2)
        pe.append(1 - norm.cdf(sqrt((1 - 0.41)**2 / delta_square)))
    plt.semilogy(snrlst, pe, snrlst, equalizer(2, 41, snrlst, nsample,
                                               'lmmse'), "-.")
    plt.legend(("Theoretical curve", "Simulated curve"), loc='lower left')
    plt.title("Theoretical vs. Simulated performances for Channel 2")
    plt.xlabel("SNR (dB)")
    plt.ylabel("SER (dB)")
    plt.grid(True, which='both')
    plt.show()

Beispiel #5

0

Datei anzeigen

Datei: query_probes.py Projekt: CBIIT/nci-webtools-dceg-FORGE2-TF

        def fun(params):
            """
            Negative log-likelihood of z-scores.

            The function has three arguments, packed into a vector:

            mean : location parameter
            logscale : log of the scale parameter
            logitprop : logit of the proportion of true nulls

            The implementation follows section 4 from Efron 2008.
            """

            d, s, p = xform(params)

            # Mass within the central region
            central_mass = (norm.cdf((null_ub - d) / s) - norm.cdf(
                (null_lb - d) / s))

            # Probability that a Z-score is null and is in the central region
            cp = p * central_mass

            # Binomial term
            rval = n_zs0 * np.log(cp) + (n_zs - n_zs0) * np.log(1 - cp)

            # Truncated Gaussian term for null Z-scores
            zv = (zscores0 - d) / s
            rval += np.sum(-zv**2 / 2) - n_zs0 * np.log(s)
            rval -= n_zs0 * np.log(central_mass)

            return -rval

Beispiel #6

0

Datei anzeigen

Datei: motifEnrichment (copy).py Projekt: albertwcheng/scripts_Motif

def DemoivreLaplaceApprox(k,l,n,p): #return [np, binP], not accurate
	np=float(n)*p;
	base=sqrt(np*(1-p));
	lf=float(l);
	kf=float(k);

	return [np,norm.cdf((lf+0.5-np)/base)-norm.cdf((kf-0.5-np)/base)];

Beispiel #7

0

Datei anzeigen

Datei: trial_backtest.py Projekt: jimwwwjim/deltaHedge

def BlackScholes(reTime, rf, S, K, sigma):
	d1=(log(S/K)+(rf+sigma**2/2)*reTime)/sigma*sqrt(reTime)
	d2=d1-sigma*sqrt(reTime)
	call_BS = (S*norm.cdf(d1,0,1)-K*exp(-rf*reTime)*norm.cdf(d2,0,1))
	put_BS = K*exp(-rf*reTime)*norm.cdf(-d2,0,1)-S*norm.cdf(-d1,0,1)
	delta=norm.cdf(d1,0,1)
	gamma=norm.pdf(d1,0,1)/(S*sigma*sqrt(reTime))
	vega=S*norm.pdf(d1)*np.sqrt(reTime)
	theta=-.5*S*norm.pdf(d1)*sigma/np.sqrt(reTime)
	return {'call_BS':call_BS,'put_BS':put_BS,'delta':delta,'gamma':gamma,'vega':vega,'theta':theta}

Beispiel #8

0

Datei anzeigen

def QQPlot(DataValues, Alpha_CI=0.95, DataLabel='Data', FigFile='QQPlot.png'):

    ### Based on: https://www.tjmahr.com/quantile-quantile-plots-from-scratch/
    ### Itself based on Fox book: Fox, J. (2015)
    ### Applied Regression Analysis and Generalized Linear Models.
    ### Sage Publications, Thousand Oaks, California.

    # Data analysis
    N = len(DataValues)
    X_Bar = np.mean(DataValues)
    S_X = np.std(DataValues,ddof=1)

    # Sort data to get the rank
    Data_Sorted = np.zeros(N)
    Data_Sorted += DataValues
    Data_Sorted.sort()

    # Compute quantiles
    EmpiricalQuantiles = np.arange(0.5, N + 0.5) / N
    TheoreticalQuantiles = norm.ppf(EmpiricalQuantiles, X_Bar, S_X)
    ZQuantiles = norm.ppf(EmpiricalQuantiles,0,1)

    # Compute data variance
    DataIQR = np.quantile(DataValues, 0.75) - np.quantile(DataValues, 0.25)
    NormalIQR = np.sum(np.abs(norm.ppf(np.array([0.25, 0.75]), 0, 1)))
    Variance = DataIQR / NormalIQR
    Z_Space = np.linspace(min(ZQuantiles), max(ZQuantiles), 100)
    Variance_Line = Z_Space * Variance + np.median(DataValues)

    # Compute alpha confidence interval (CI)
    Z_SE = np.sqrt(norm.cdf(Z_Space) * (1 - norm.cdf(Z_Space)) / N) / norm.pdf(Z_Space)
    Data_SE = Z_SE * Variance
    Z_CI_Quantile = norm.ppf(np.array([(1 - Alpha_CI) / 2]), 0, 1)

    # Create point in the data space
    Data_Space = np.linspace(min(TheoreticalQuantiles), max(TheoreticalQuantiles), 100)

    # QQPlot
    BorderSpace = max( 0.05*abs(Data_Sorted.min()), 0.05*abs(Data_Sorted.max()))
    Y_Min = Data_Sorted.min() - BorderSpace
    Y_Max = Data_Sorted.max() + BorderSpace
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Axes.plot(TheoreticalQuantiles, Data_Sorted, linestyle='none', marker='o', mew=0.5, fillstyle='none', color=(0, 0, 0), label=DataLabel)
    Axes.plot(Data_Space, Variance_Line, linestyle='--', color=(1, 0, 0), label='Variance :' + str(format(np.round(Variance, 2),'.2f')))
    Axes.plot(Data_Space, Variance_Line + Z_CI_Quantile * Data_SE, linestyle='--', color=(0, 0, 1), label=str(int(100*Alpha_CI)) + '% CI')
    Axes.plot(Data_Space, Variance_Line - Z_CI_Quantile * Data_SE, linestyle='--', color=(0, 0, 1))
    plt.xlabel('Theoretical quantiles (-)')
    plt.ylabel('Empirical quantiles (-)')
    plt.ylim([Y_Min, Y_Max])
    plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15), prop={'size':10})
    plt.savefig(FigFile)
    plt.show()
    plt.close(Figure)

    return Variance

Beispiel #9

0

Datei anzeigen

Datei: test_disp_matches_morris.py Projekt: gavin-stewart/echem_modeling

    def testSolveIWithDispersionMatchesMorrisk0Disp(self):
        """The solveIWithDispersionDimensional method matches results from
        Morris et al 2015.
        """
        time = np.linspace(0, 7, 1.4e5) #1000 pts per second
        time_step = time[1] - time[0]
        num_time_pts = len(time)
        dE = 0
        freq = 0
        Ru = 0
        Cdl = 0
        Cdl1 = 0
        Cdl2 = 0
        Cdl3 = 0
        EStart = -0.2
        ERev = 0.5
        temp = 293
        nu = 0.1

        area = 1
        coverage = 1e-11
        k_0BinsUnscaled = np.linspace(-7, 7, 15)
        #Define wE in terms of bin widths
        leftBinEnds = np.empty(15)
        leftBinEnds[1:] = np.linspace(-6.5, 6.5, 14)
        leftBinEnds[0] = -np.inf
        rightBinEnds = np.empty(15)
        rightBinEnds[:-1] = np.linspace(-6.5, 6.5, 14)
        rightBinEnds[-1] = np.inf
        wK = norm.cdf(rightBinEnds, loc=0, scale=2) -\
        norm.cdf(leftBinEnds, loc=0, scale=2)

        expWidth = np.array([0.124, 0.128, 0.134, 0.141, 0.150, 0.161,
                             0.172, 0.185, 0.198, 0.211])

        self.assertEqual(np.sum(wK), 1)

        for m, ew in zip(range(1, 11), expWidth):
            k_0Vals = 0.1 * 2**(0.1*m*k_0BinsUnscaled)
            bins = [(0, k_0, w) for k_0, w in zip(k_0Vals, wK)]
            I, amt = st.solve_reaction_disp_dim_bins(time_step,
                        num_time_pts, dE, freq, Ru, Cdl, Cdl1, Cdl2, Cdl3,
                        EStart, ERev, temp, nu, area, coverage, bins)

            width = st.half_maximum_width(I, time, nu)
            err = abs(width - ew)
            #Rounding error + 2*step size + error in I
            #(estimated at 1*stepsize)
            self.assertLess(err, 7e-4)

Beispiel #10

0

Datei anzeigen

Datei: mstats_extras.py Projekt: metamorph-inc/meta-core

def compare_medians_ms(group_1, group_2, axis=None):
    """
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.
    group_2 : array_like
        Second dataset.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    """
    (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1 ** 2 + std_2 ** 2)
    return 1 - norm.cdf(W)

Beispiel #11

0

Datei anzeigen

Datei: 2-25.py Projekt: creasyw/Courses

def main():
    gamma = np.arange(2, 6.01, 0.01)
    delta = np.arange(4, 12.01, 0.01)
    c = np.zeros((len(gamma), len(delta)))

    # Calculate the expectation for coverage area
    for i in range(len(gamma)):
        for j in range(len(delta)):
            b = 10 * gamma[i] * log10(e) / delta[j]
            c[i, j] = 0.5 + exp(1 / b**2) * norm.cdf(-2 / b)
    print np.max(c), np.min(c)

    # Plotting
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    X, Y = np.meshgrid(delta, gamma)
    surf = ax.plot_surface(X,
                           Y,
                           c,
                           rstride=5,
                           cstride=5,
                           cmap=cm.jet,
                           linewidth=1,
                           antialiased=True)
    ax.set_zlim3d(np.min(c), np.max(c))
    ax.set_xlabel('delta')
    ax.set_ylabel('gamma')
    ax.set_zlabel('coverage')
    fig.colorbar(surf)
    plt.show()

Beispiel #12

0

Datei anzeigen

Datei: mstats_extras.py Projekt: QPanProjects/Surrogate-Model

def compare_medians_ms(group_1, group_2, axis=None):
    """
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.
    group_2 : array_like
        Second dataset.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    """
    (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1 ** 2 + std_2 ** 2)
    return 1 - norm.cdf(W)

Beispiel #13

0

Datei anzeigen

Datei: gee_generate_tests.py Projekt: locolucco209/MongoScraper

def generate_ordinal():

    ## Regression coefficients
    beta = np.zeros(5, dtype=np.float64)
    beta[2] = 1
    beta[4] = -1

    rz = 0.5

    OUT = open("gee_ordinal_1.csv", "w")

    for i in range(200):

        n = np.random.randint(3, 6)  # Cluster size

        x = np.random.normal(size=(n, 5))
        for j in range(5):
            x[:, j] += np.random.normal()
        pr = np.dot(x, beta)
        pr = np.array([1, 0, -0.5]) + pr[:, None]
        pr = 1 / (1 + np.exp(-pr))

        z = rz*np.random.normal() +\
            np.sqrt(1-rz**2)*np.random.normal(size=n)
        u = norm.cdf(z)

        y = (u[:, None] > pr).sum(1)

        for j in range(n):
            OUT.write("%d,%d," % (i, y[j]))
            OUT.write(",".join(["%.3f" % b for b in x[j, :]]) + "\n")

    OUT.close()

Beispiel #14

0

Datei anzeigen

Datei: utilities.py Projekt: inbei/rocket

def weightedUtest(g1, w1, g2, w2):
    """ Determines the confidence level of the assertion:
    'The values of g2 are higher than those of g1'.  
    (adapted from the scipy.stats version)
    
    Twist: here the elements of each group have associated weights, 
    corresponding to how often they are present (i.e. two identical entries with 
    weight w are equivalent to one entry with weight 2w).
    Reference: "Studies in Continuous Black-box Optimization", Schaul, 2011 [appendix B].
    
    TODO: make more efficient for large sets. 
    """
    from scipy.stats.distributions import norm
    import numpy
    n1 = sum(w1)
    n2 = sum(w2)
    u1 = 0.
    for x1, wx1 in zip(g1, w1):
        for x2, wx2 in zip(g2, w2):
            if x1 == x2:
                u1 += 0.5 * wx1 * wx2
            elif x1 > x2:
                u1 += wx1 * wx2
    mu = n1 * n2 / 2.
    sigu = numpy.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.)
    z = (u1 - mu) / sigu
    conf = norm.cdf(z)
    return conf

Beispiel #15

0

Datei anzeigen

Datei: LinearModels.py Projekt: MathieuSm/Post-Msc

def Z_test(x1, x2, Alpha=0.95):
    ResultsTable = pd.DataFrame()

    # Compute standard deviation and number of observation
    S_x1 = x1.std(ddof=1)
    S_x2 = x2.std(ddof=1)
    N_x1 = len(x1)
    N_x2 = len(x2)

    # Test statistic and p value
    Z = (x1.mean() - x2.mean()) / np.sqrt(S_x1**2 / N_x1 + S_x2**2 / N_x2)
    p = 2 * (1 - norm.cdf(abs(Z)))

    # Rejection range
    MinValue = norm.ppf((1 - Alpha) / 2)
    MaxValue = norm.ppf(1 - (1 - Alpha) / 2)
    RejectionRange = np.array([[-np.inf, round(MinValue, 3)],
                               [round(MaxValue, 3), np.inf]])

    Results = {
        'Test statistic': round(Z, 3),
        'p value': round(p, 9),
        'Significance level (%)': Alpha * 100,
        'Rejection range': RejectionRange
    }

    ResultsTable = ResultsTable.append(Results, ignore_index=True)

    return ResultsTable

Beispiel #16

0

Datei anzeigen

Datei: mstats_extras.py Projekt: AndreI11/SatStressGui

def compare_medians_ms(group_1, group_2, axis=None):
    """Compares the medians from two independent groups along the given axis.

The comparison is performed using the McKean-Schrader estimate of the standard
error of the medians.

Parameters
----------
    group_1 : {sequence}
        First dataset.
    group_2 : {sequence}
        Second dataset.
    axis : {integer}
        Axis along which the medians are estimated. If None, the arrays are flattened.

Returns
-------
    A (p,) array of comparison values.

    """
    (med_1, med_2) = (ma.median(group_1,
                                axis=axis), ma.median(group_2, axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)

Beispiel #17

0

Datei anzeigen

Datei: statistical_tools.py Projekt: deccs/PLearn

def skewtest(a,axis=-1):
    """Tests whether the skew is significantly different from a normal distribution.

    Axis can equal None (ravel array first), an integer (the axis over
    which to operate), or a sequence (operate over multiple axes).

    NOTE: This function is mostly copied from scipy.stats.stats, but
    corrects for a major bug: the pvalue returned by SciPy is not valid
    when the skewness is negative! The return values also are slightly
    different: the skew is actually returned will the z-score is not.

    Returns: skewness and 2-tail z-probability
    """
    a, axis = _chk_asarray(a, axis)
    if axis is None:
        a = ravel(a)
        axis = 0
    skewness = skew(a,axis)
    n = float(a.shape[axis])
    if n<8:
        print "skewtest only valid for n>=8 ... continuing anyway, n=",n
    y = skewness * sqrt(((n+1)*(n+3)) / (6.0*(n-2)) )
    beta2 = ( 3.0*(n*n+27*n-70)*(n+1)*(n+3) ) / ( (n-2.0)*(n+5)*(n+7)*(n+9) )
    W2 = -1 + sqrt(2*(beta2-1))
    delta = 1/sqrt(log(sqrt(W2)))
    alpha = sqrt(2.0/(W2-1))
    y = where(equal(y,0),1,y)
    Z = delta*log(y/alpha + sqrt((y/alpha)**2+1))
    
    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2*( 1 - norm.cdf(abs(Z)) )
    assert pvalue >= 0.0 and pvalue <= 1.0
    return skewness, pvalue

Beispiel #18

0

Datei anzeigen

Datei: XX_TestsStatistics.py Projekt: MathieuSm/Post-Msc

def MannWhitneyUTest(x,y):

    Nx = len(x)
    Ny = len(y)

    XData = pd.DataFrame({'Values': x, 'Group': 'Control'}, index=range(len(x)))
    YData = pd.DataFrame({'Values': y, 'Group': 'Test'}, index=range(len(y)))

    Pool = XData.append(YData, ignore_index=True)
    Pool['Ranks'] = Pool['Values'].rank(method='average')

    R1 = Pool[Pool['Group']=='Control']['Ranks'].sum()
    U1 = R1 - (Nx * (Nx+1)) / 2
    U2 = Nx * Ny - U1

    U = max(U1, U2)

    UMean = Nx * Ny / 2
    UStd  = np.sqrt((Nx * Ny * (Nx + Ny + 1)) / 12)

    # Transform into the z space
    from scipy.stats.distributions import norm
    z = (U - UMean) / UStd
    p = 2 * (1 - norm.cdf(abs(z)))

    return U, p

Beispiel #19

0

Datei anzeigen

Datei: mstats_extras.py Projekt: AlloysMila/scipy

def compare_medians_ms(group_1, group_2, axis=None):
    """Compares the medians from two independent groups along the given axis.

The comparison is performed using the McKean-Schrader estimate of the standard
error of the medians.

Parameters
----------
    group_1 : {sequence}
        First dataset.
    group_2 : {sequence}
        Second dataset.
    axis : {integer}
        Axis along which the medians are estimated. If None, the arrays are flattened.

Returns
-------
    A (p,) array of comparison values.

    """
    (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)

Beispiel #20

0

Datei anzeigen

Datei: statistical_tools.py Projekt: zbxzc35/PLearn

def skewtest(a, axis=-1):
    """Tests whether the skew is significantly different from a normal distribution.

    Axis can equal None (ravel array first), an integer (the axis over
    which to operate), or a sequence (operate over multiple axes).

    NOTE: This function is mostly copied from scipy.stats.stats, but
    corrects for a major bug: the pvalue returned by SciPy is not valid
    when the skewness is negative! The return values also are slightly
    different: the skew is actually returned will the z-score is not.

    Returns: skewness and 2-tail z-probability
    """
    a, axis = _chk_asarray(a, axis)
    if axis is None:
        a = ravel(a)
        axis = 0
    skewness = skew(a, axis)
    n = float(a.shape[axis])
    if n < 8:
        print "skewtest only valid for n>=8 ... continuing anyway, n=", n
    y = skewness * sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
    beta2 = (3.0 * (n * n + 27 * n - 70) * (n + 1) *
             (n + 3)) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9))
    W2 = -1 + sqrt(2 * (beta2 - 1))
    delta = 1 / sqrt(log(sqrt(W2)))
    alpha = sqrt(2.0 / (W2 - 1))
    y = where(equal(y, 0), 1, y)
    Z = delta * log(y / alpha + sqrt((y / alpha)**2 + 1))

    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2 * (1 - norm.cdf(abs(Z)))
    assert pvalue >= 0.0 and pvalue <= 1.0
    return skewness, pvalue

Beispiel #21

0

Datei anzeigen

def copula(num_samples, rho_mat, mu_mat, methods):
    """Copula procedure to generate an OTU table with corrs close to rho_mat.
    Inputs:
     num_samples - int, number of samples. 
     rho_mat - 2d arr, symmetric positive definite matrix which specifies the 
     correlation or covariation between the otu's in the table. 
     mu_mat - 1d arr w/ len(num_otus), mean of otu for multivariate random call.
     methods - list of lists w/ len(num_otus), each list has a variable number 
     of elements. the first element in each list is the 
     scipy.stats.distributions function like lognorm or beta. this is the 
     function that we draw values from for the actual otu. the remaining entries
     are the parameters for that function in order that the function requires 
     them.
    """
    num_otus = len(mu_mat)
    # draw from multivariate normal distribution with specified parameters.
    # transpose so that it remains otuXsample matrix.
    Z = multivariate_normal(mean=mu_mat, cov=rho_mat, size=num_samples).T
    # using the inverse cdf of the normal distribution find where each sample
    # value for each otu falls in the normal cdf.
    U = norm.cdf(Z)
    # make the otu table using the methods and cdf values. ppf_args[0] is the
    # distribution function (eg. lognorm) whose ppf function we will use
    # to transform the cdf vals into the new distribution. ppf_args[1:] is the
    # params of the function like a, b, size, loc etc.
    otu_table = array([
        ppf_args[0].ppf(otu_cdf_vals, *ppf_args[1:], size=num_otus)
        for ppf_args, otu_cdf_vals in zip(methods, U)
    ])
    return where(otu_table > 0, otu_table, 0)

Beispiel #22

0

Datei anzeigen

Datei: summary.py Projekt: iamlemec/fastreg

def param_table(beta, y_name, x_names, sigma=None):
    # basic frame
    frame = pd.DataFrame({
        'coeff': beta,
    }, index=x_names)
    frame = frame.rename_axis(y_name, axis=1)

    # handle sigma cases
    if sigma is None:
        return frame
    elif type(sigma) is tuple:
        sigr, sigc = sigma
        stderr = np.sqrt(np.hstack([maybe_diag(sigr), sigc]))
    else:
        stderr = np.sqrt(maybe_diag(sigma))

    # confidence interval
    low95 = beta - z95 * stderr
    high95 = beta + z95 * stderr

    # p-value
    zscore = beta / stderr
    pvalue = 2 * (1 - norm.cdf(np.abs(zscore)))

    # stderr stats
    frame = frame.assign(stderr=stderr,
                         low95=low95,
                         high95=high95,
                         pvalue=pvalue)

    return frame

Beispiel #23

0

Datei anzeigen

Datei: gee_generate_tests.py Projekt: 0ceangypsy/statsmodels

def generate_ordinal():

    ## Regression coefficients
    beta = np.zeros(5, dtype=np.float64)
    beta[2] = 1
    beta[4] = -1

    rz = 0.5

    OUT = open("gee_ordinal_1.csv", "w")

    for i in range(200):

        n = np.random.randint(3, 6) # Cluster size

        x = np.random.normal(size=(n,5))
        for j in range(5):
            x[:,j] += np.random.normal()
        pr = np.dot(x, beta)
        pr = np.array([1,0,-0.5]) + pr[:,None]
        pr = 1 / (1 + np.exp(-pr))

        z = rz*np.random.normal() +\
            np.sqrt(1-rz**2)*np.random.normal(size=n)
        u = norm.cdf(z)

        y = (u[:,None] > pr).sum(1)

        for j in range(n):
            OUT.write("%d,%d," % (i, y[j]))
            OUT.write(",".join(["%.3f" % b for b in x[j,:]]) + "\n")

    OUT.close()

Beispiel #24

0

Datei anzeigen

Datei: copula.py Projekt: RNAer/correlations

def copula(num_samples, rho_mat, mu_mat, methods):
    """Copula procedure to generate an OTU table with corrs close to rho_mat.
    Inputs:
     num_samples - int, number of samples. 
     rho_mat - 2d arr, symmetric positive definite matrix which specifies the 
     correlation or covariation between the otu's in the table. 
     mu_mat - 1d arr w/ len(num_otus), mean of otu for multivariate random call.
     methods - list of lists w/ len(num_otus), each list has a variable number 
     of elements. the first element in each list is the 
     scipy.stats.distributions function like lognorm or beta. this is the 
     function that we draw values from for the actual otu. the remaining entries
     are the parameters for that function in order that the function requires 
     them.
    """
    num_otus = len(mu_mat)
    # draw from multivariate normal distribution with specified parameters.
    # transpose so that it remains otuXsample matrix.
    Z = multivariate_normal(mean=mu_mat, cov=rho_mat, size=num_samples).T
    # using the inverse cdf of the normal distribution find where each sample 
    # value for each otu falls in the normal cdf.
    U = norm.cdf(Z)
    # make the otu table using the methods and cdf values. ppf_args[0] is the 
    # distribution function (eg. lognorm) whose ppf function we will use
    # to transform the cdf vals into the new distribution. ppf_args[1:] is the 
    # params of the function like a, b, size, loc etc. 
    otu_table = array([ppf_args[0].ppf(otu_cdf_vals, *ppf_args[1:], 
        size=num_otus) for ppf_args, otu_cdf_vals in zip(methods, U)])
    return where(otu_table > 0, otu_table, 0)

Beispiel #25

0

Datei anzeigen

Datei: utilities.py Projekt: firestrand/pybrain-gpu

def weightedUtest(g1, w1, g2, w2):
    """ Determines the confidence level of the assertion:
    'The values of g2 are higher than those of g1'.  
    (adapted from the scipy.stats version)
    
    Twist: here the elements of each group have associated weights, 
    corresponding to how often they are present (i.e. two identical entries with 
    weight w are equivalent to one entry with weight 2w).
    Reference: "Studies in Continuous Black-box Optimization", Schaul, 2011 [appendix B].
    
    TODO: make more efficient for large sets. 
    """
    from scipy.stats.distributions import norm
    import numpy

    n1 = sum(w1)
    n2 = sum(w2)
    u1 = 0.
    for x1, wx1 in zip(g1, w1):
        for x2, wx2 in zip(g2, w2):
            if x1 == x2:
                u1 += 0.5 * wx1 * wx2
            elif x1 > x2:
                u1 += wx1 * wx2
    mu = n1 * n2 / 2.
    sigu = numpy.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.)
    z = (u1 - mu) / sigu
    conf = norm.cdf(z)
    return conf

Beispiel #26

0

Datei anzeigen

Datei: test_disp_matches_morris.py Projekt: gavin-stewart/echem_modeling

    def testsolveIWithDispersionMatchesMorrisE0Disp(self):
        """The solveIWithDispersionDimensional method reproduces the results
        of Morris et al 2015.
        """
        time = np.linspace(0, 7, 7e4) #1000 pts per second
        time_step = time[1] - time[0]
        num_time_pts = len(time)
        dE = 0
        freq = 0
        Ru = 0
        Cdl = 0.
        Cdl1 = 0.
        Cdl2 = 0.
        Cdl3 = 0.
        EStart = -0.2
        ERev = 0.5
        temp = 293
        nu = 0.1
        area = 1
        coverage = 1e-11
        E_0BinsUnscaled = np.linspace(-17.5e-3, 17.5e-3, 15)
        #Define wE in terms of bin widths
        leftBinEnds = np.empty(15)
        leftBinEnds[1:] = np.linspace(-16.25e-3, 16.25e-3, 14)
        leftBinEnds[0] = -np.inf
        rightBinEnds = np.empty(15)
        rightBinEnds[:-1] = np.linspace(-16.25e-3, 16.25e-3, 14)
        rightBinEnds[-1] = np.inf
        wE = norm.cdf(rightBinEnds, loc=0, scale=5e-3) -\
        norm.cdf(leftBinEnds, loc=0, scale=5e-3)

        k_0Bins = {0.1 : 1.0}
        expWidth = np.array([0.124, 0.126, 0.129, 0.133, 0.138, 0.144, 0.151, 0.159, 0.167, 0.176])

        self.assertEqual(np.sum(wE), 1)

        for l,ew in zip(range(1, 11), expWidth):
            E_0Vals = l * E_0BinsUnscaled
            self.assertTrue(np.isclose(E_0Vals[-1]-E_0Vals[0], l*35.e-3))
        bins = [(E_0, 0.1, we) for E_0, we in zip(E_0Vals, wE)]

        I, amt = st.solve_reaction_disp_dim_bins(time_step, num_time_pts, dE, freq, Ru,
                    Cdl, Cdl1, Cdl2, Cdl3, EStart, ERev, temp, nu, area,
                    coverage, bins)

        width = st.half_maximum_width(I, time, nu)
        self.assertLess(abs(width - ew), 7e-4) #Rounding error + 2*step size + solution error (estimated at 1*step size)

Beispiel #27

0

Datei anzeigen

    def test_scoretest(self):
        # Regression tests

        np.random.seed(6432)
        n = 200 # Must be divisible by 4
        exog = np.random.normal(size=(n, 4))
        endog = exog[:, 0] + exog[:, 1] + exog[:, 2]
        endog += 3*np.random.normal(size=n)
        group = np.kron(np.arange(n/4), np.ones(4))

        # Test under the null.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([0.,])
        family = Gaussian()
        va = Independence()
        mod1 = GEE(endog, exog, group, family=family,
                  cov_struct=va, constraint=(L, R))
        rslt1 = mod1.fit()
        assert_almost_equal(mod1.score_test_results["statistic"],
                            1.08126334)
        assert_almost_equal(mod1.score_test_results["p-value"],
                            0.2984151086)

        # Test under the alternative.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([1.0,])
        family = Gaussian()
        va = Independence()
        mod2 = GEE(endog, exog, group, family=family,
                   cov_struct=va, constraint=(L, R))
        rslt2 = mod2.fit()
        assert_almost_equal(mod2.score_test_results["statistic"],
                            3.491110965)
        assert_almost_equal(mod2.score_test_results["p-value"],
                            0.0616991659)

        # Compare to Wald tests
        exog = np.random.normal(size=(n, 2))
        L = np.array([[1, -1]])
        R = np.array([0.])
        f = np.r_[1, -1]
        for i in range(10):
            endog = exog[:, 0] + (0.5 + i/10.)*exog[:, 1] +\
                    np.random.normal(size=n)
            family = Gaussian()
            va = Independence()
            mod0 = GEE(endog, exog, group, family=family,
                       cov_struct=va)
            rslt0 = mod0.fit()
            family = Gaussian()
            va = Independence()
            mod1 = GEE(endog, exog, group, family=family,
                       cov_struct=va, constraint=(L, R))
            rslt1 = mod1.fit()
            se = np.sqrt(np.dot(f, np.dot(rslt0.cov_params(), f)))
            wald_z = np.dot(f, rslt0.params) / se
            wald_p = 2*norm.cdf(-np.abs(wald_z))
            score_p = mod1.score_test_results["p-value"]
            assert_array_less(np.abs(wald_p - score_p), 0.02)

Beispiel #28

0

Datei anzeigen

Datei: test_gee.py Projekt: DevSinghSachan/statsmodels

    def test_scoretest(self):
        # Regression tests

        np.random.seed(6432)
        n = 200 # Must be divisible by 4
        exog = np.random.normal(size=(n, 4))
        endog = exog[:, 0] + exog[:, 1] + exog[:, 2]
        endog += 3*np.random.normal(size=n)
        group = np.kron(np.arange(n/4), np.ones(4))

        # Test under the null.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([0.,])
        family = Gaussian()
        va = Independence()
        mod1 = GEE(endog, exog, group, family=family,
                  cov_struct=va, constraint=(L, R))
        rslt1 = mod1.fit()
        assert_almost_equal(mod1.score_test_results["statistic"],
                            1.08126334)
        assert_almost_equal(mod1.score_test_results["p-value"],
                            0.2984151086)

        # Test under the alternative.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([1.0,])
        family = Gaussian()
        va = Independence()
        mod2 = GEE(endog, exog, group, family=family,
                   cov_struct=va, constraint=(L, R))
        rslt2 = mod2.fit()
        assert_almost_equal(mod2.score_test_results["statistic"],
                            3.491110965)
        assert_almost_equal(mod2.score_test_results["p-value"],
                            0.0616991659)

        # Compare to Wald tests
        exog = np.random.normal(size=(n, 2))
        L = np.array([[1, -1]])
        R = np.array([0.])
        f = np.r_[1, -1]
        for i in range(10):
            endog = exog[:, 0] + (0.5 + i/10.)*exog[:, 1] +\
                    np.random.normal(size=n)
            family = Gaussian()
            va = Independence()
            mod0 = GEE(endog, exog, group, family=family,
                       cov_struct=va)
            rslt0 = mod0.fit()
            family = Gaussian()
            va = Independence()
            mod1 = GEE(endog, exog, group, family=family,
                       cov_struct=va, constraint=(L, R))
            rslt1 = mod1.fit()
            se = np.sqrt(np.dot(f, np.dot(rslt0.cov_params(), f)))
            wald_z = np.dot(f, rslt0.params) / se
            wald_p = 2*norm.cdf(-np.abs(wald_z))
            score_p = mod1.score_test_results["p-value"]
            assert_array_less(np.abs(wald_p - score_p), 0.02)

Beispiel #29

0

Datei anzeigen

Datei: UserAnalysisFunctions.py Projekt: npchan1024/ByteFoods

def calculateProbability(time, avg, sd):
    if sd == pd.to_timedelta(0):
        print(
            "Standard deviation is 0, not enough data points, returning p = 0")
        p = 0
    else:
        z = (time - avg) / sd
        p = norm.cdf(z)
    return p

Beispiel #30

0

Datei anzeigen

def compare_medians_ms(group_1, group_2, axis=None):
    """
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    Examples
    --------

    >>> from scipy import stats
    >>> a = [1, 2, 3, 4, 5, 6, 7]
    >>> b = [8, 9, 10, 11, 12, 13, 14]
    >>> stats.mstats.compare_medians_ms(a, b, axis=None)
    1.0693225866553746e-05

    The function is vectorized to compute along a given axis.

    >>> import numpy as np
    >>> rng = np.random.default_rng()
    >>> x = rng.random(size=(3, 7))
    >>> y = rng.random(size=(3, 8))
    >>> stats.mstats.compare_medians_ms(x, y, axis=1)
    array([0.36908985, 0.36092538, 0.2765313 ])

    References
    ----------
    .. [1] McKean, Joseph W., and Ronald M. Schrader. "A comparison of methods
       for studentizing the sample median." Communications in
       Statistics-Simulation and Computation 13.6 (1984): 751-773.

    """
    (med_1, med_2) = (ma.median(group_1,
                                axis=axis), ma.median(group_2, axis=axis))
    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
                      mstats.stde_median(group_2, axis=axis))
    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)

Beispiel #31

0

Datei anzeigen

Datei: sigTests.py Projekt: rjeschmi/PePr

def get_candidate_window2(read, x, y, repx, repy, threshold):
    # using PHI = 1e6 to prescreen the genome 
    PHI = 1e6
    GAMMA_HAT = 1.0
    tau = numpy.sqrt( y*( repx*x*(PHI+y) + repy*y*(PHI+x))/repx/repy/PHI/x**3)
    gamma = y/x
    z = (numpy.log(gamma)-numpy.log(GAMMA_HAT))*gamma/tau
    pvalue = norm.cdf(-z)
    pre_idx_list = numpy.where(pvalue[10:-10]<threshold)[0]+10
    return numpy.array(pre_idx_list)

Beispiel #32

0

Datei anzeigen

Datei: statTests.py Projekt: romkas/Course

def logrank_power(n, surv1, surv2, alpha=0.05):
    d = n * (2 - surv1 - surv2)
    if surv1 == 1 or surv2 == 1:
        return 0
    elif surv1 == 0 or surv2 == 0:
        return -1
    phi = log(surv1) / log(surv2) if surv1 < surv2 else log(surv2) / log(surv1)
    z_a = norm.ppf(1 - alpha)
    z_1_beta = sqrt(d * (1 - phi) * (1 - phi) / (1 + phi) / (1 + phi)) - z_a
    return norm.cdf(z_1_beta)

Beispiel #33

0

Datei anzeigen

Datei: FortetEstimator.py Projekt: aviolov/SinSpikePython

    def loss_function_simple(abg, visualize, fig_tag = ''):
        error = .0;
        if visualize:
            figure()
                        
        for (phi_m, phi_idx) in zip(phis, xrange(N_phi)):
            Is = bins[phi_m]['Is']
            uniqueIs = bins[phi_m]['unique_Is']
             
            a,b,g = abg[0], abg[1], abg[2]
            movingThreshold = getMovingThreshold(a,g, phi_m,binnedTrain.theta)
            
            LHS_numerator = movingThreshold(uniqueIs[1:]) *sqrt(2.)
            LHS_denominator = b * sqrt(1 - exp(-2*uniqueIs[1:]))
            LHS = 1 -  norm.cdf(LHS_numerator / LHS_denominator)
            
            RHS = zeros_like(LHS)
            N  = len(Is)
            for rhs_idx in xrange(1,len(uniqueIs)):
                t = uniqueIs[rhs_idx]
                lIs = Is[Is<t]
                taus = t - lIs;
                
                numerator = (movingThreshold(t) - movingThreshold(lIs)* exp(-taus)) * sqrt(2.)
                denominator = b *  sqrt(1. - exp(-2*taus))
                RHS[rhs_idx-1] = sum(1. - norm.cdf(numerator/denominator)) / N
            
            weight = len(Is)
            lerror = dot((LHS - RHS)**2 , diff(uniqueIs)) * weight;
            error += lerror
        
            if visualize:
                    subplot(ceil(len(phis)/2),2, phi_idx+1);hold(True)
                    ts = uniqueIs[1:]; 
                    plot(ts, LHS, 'b');
                    plot(ts, RHS, 'rx');
#                    annotate('$\phi$ = %.2g'%(phi_m), ((min(ts), max(LHS)/2.)), ) 
                    annotate('lerror = %.3g'%lerror,((min(ts), max(LHS)/2.)), ) 
        if visualize:
            subplot(ceil(len(phis)/2),2, 1);
            title(fig_tag)          
        return error

Beispiel #34

0

Datei anzeigen

Datei: FortetEstimator.py Projekt: aviolov/SinSpikePython

 def RHS(ts):
     if False == iterable(ts):
         ts =  [ts] 
     rhs = empty_like(ts)
     for t, t_indx in zip(ts, xrange(size(ts))):
         lIs = Is[Is<t];
         taus = t - lIs;
         numerator = (movingThreshold(t) - movingThreshold(lIs)* exp(-taus)) * sqrt(2.)
         denominator = b *  sqrt(1. - exp(-2*taus))
         rhs[t_indx] = sum(1. - norm.cdf(numerator/denominator)) / N
     return rhs

Beispiel #35

0

Datei anzeigen

def get_candidate_window2(x, y, repx, repy, threshold):
    # using PHI = 1e6 to prescreen the genome
    PHI = 1e6
    GAMMA_HAT = 1.0
    tau = numpy.sqrt(y * (repx * x * (PHI + y) + repy * y * (PHI + x)) / repx /
                     repy / PHI / x**3)
    gamma = y / x
    z = (numpy.log(gamma) - numpy.log(GAMMA_HAT)) * gamma / tau
    pvalue = norm.cdf(-z)
    pre_idx_list = numpy.where(pvalue[10:-10] < threshold)[0] + 10
    return numpy.array(pre_idx_list)

Beispiel #36

0

Datei anzeigen

def per_chr_nbtest(read_array, chr, swap, threshold, peaktype, difftest,
                   start1, end1, start2, end2, test_rep, control_rep):
    t1 = time.time()
    sig_peaks_list = []
    y_bar_array = numpy.mean(read_array[:, start1:end1], 1)
    x_bar_array = numpy.mean(read_array[:, start2:end2], 1)
    if swap:  #swap the chip and control reads.
        x_bar_array, y_bar_array = y_bar_array, x_bar_array
    cand_index = get_candidate_window2(x_bar_array, y_bar_array, control_rep,
                                       test_rep, threshold)
    debug("There are %d candidate windows for %s (PID:%d)", len(cand_index),
          chr, os.getpid())
    if not swap:
        disp_list = numpy.array([
            estimate_area_dispersion_factor(read_array, test_rep, control_rep,
                                            idx, peaktype, difftest)
            for idx in cand_index
        ])
    else:
        disp_list = numpy.array([
            estimate_area_dispersion_factor(read_array, control_rep, test_rep,
                                            idx, peaktype, difftest)
            for idx in cand_index
        ])
    #debug("finished estimating dispersion for %s", chr)

# return []
    cand_x_bar_array = x_bar_array[cand_index]
    cand_y_bar_array = y_bar_array[cand_index]
    gamma_array = cand_y_bar_array / cand_x_bar_array
    tau_hat_array = numpy.sqrt(
        cand_y_bar_array *
        ((control_rep * cand_x_bar_array * (disp_list + cand_y_bar_array)) +
         (test_rep * cand_y_bar_array * (disp_list + cand_x_bar_array))) /
        (test_rep * control_rep * disp_list * (cand_x_bar_array**3)))

    gamma_hat = 1.0  #Null hypothesis
    z_score_array = ((numpy.log(gamma_array) - numpy.log(gamma_hat)) *
                     gamma_array / tau_hat_array)
    pval_array = norm.cdf(-z_score_array)
    test_index = numpy.where(pval_array < threshold)
    test_index = test_index[0]
    sig_index = cand_index[test_index]
    sig_pval = pval_array[test_index]
    sig_group1_count = cand_y_bar_array[test_index]
    sig_group2_count = cand_x_bar_array[test_index]
    #sig_disp = disp_list[test_index]
    for i, a in enumerate(test_index):
        sig_peaks_list.append(
            Peak(chr, sig_index[i], sig_group1_count[i], sig_group2_count[i],
                 sig_pval[i], 0))
    t2 = time.time()
    debug("Analysis finished for %s, used %f sec CPU time", chr, t2 - t1)
    return sig_peaks_list

Beispiel #37

0

Datei anzeigen

Datei: var_redctn.py Projekt: ryu577/stochproc

def tst_importance_sampl():
    """
	Reducing variance using importance sampling.
	"""
    print("Probability that std normal will be greater than 2 is:" +
          str((1 - norm.cdf(2, 0, 1))))
    print("What we get from direct simulation:" +
          str(sum(np.random.normal(0, 1, size=10000) > 2) / 10000))
    summ = 0
    for x in np.random.normal(2, 1, size=10000):
        summ += (x > 2) * norm.pdf(x, 0, 1) / norm.pdf(x, 2, 1)
    print("With importance sampling:" + str(summ / 10000))

Beispiel #38

0

Datei anzeigen

Datei: FortetEstimator.py Projekt: aviolov/SinSpikePython

 def RHS(ts):
     if False == iterable(ts):
         ts =  array([ts])
     lIs = tile(Is,  len(ts) ).reshape((len(ts), len(Is))).transpose()
     lts = tile(ts, (len(Is),1 ) )
     mask = lIs < lts
     taus = (lts - lIs); #*mask
     #NOTE BELOW WE use abs(taus) since for non-positive taus we will mask away anyway:
     numerator = (movingThreshold(lts) - movingThreshold(lIs)* exp(-abs(taus))) * sqrt(2.)
     denominator = b *  sqrt(1. - exp(-2*abs(taus)))
     
     rhs = sum( (1. - norm.cdf(numerator/denominator))*mask, axis=0) / N_Is
     return rhs

Beispiel #39

0

Datei anzeigen

Datei: ifiles_runner_corr.py Projekt: jd690764/nwlib

def filterExptsByPseudoCountDistr( ddict ):
    
    # remove experiments where the pseudocount is high
    # relative to the other pseudocounts
    pseudodict     = { k : ddict[k]['PSEUDO'] for k in ddict }
    pskeys         = list(pseudodict.keys())
    pslogvals      = np.log10(list(pseudodict.values()))
    pslogmad       = mad(pslogvals) ; 
    pslogmedian    = np.percentile(pslogvals,50)
    pslvps_hi      = 1-norm.cdf((pslogvals-pslogmedian)/pslogmad)
    rejected_ds_hi = multipletests( pslvps_hi, alpha=0.05 )[0]

    # return data in a dictionary
    filteredExpts  = {  pskeys[i] : rejected_ds_hi[i] for i in range(len(pskeys))}
    return filteredExpts

Beispiel #40

0

Datei anzeigen

Datei: gee_score_test_simulation.py Projekt: timgates42/statsmodels

def dosim(hyp, cov_struct=None, mcrep=500):

    # Storage for the simulation results
    scales = [[], []]

    # P-values from the score test
    pv = []

    # Monte Carlo loop
    for k in range(mcrep):

        # Generate random "probability points" u  that are uniformly
        # distributed, and correlated within clusters
        z = np.random.normal(size=n)
        u = np.random.normal(size=n // m)
        u = np.kron(u, np.ones(m))
        z = r * z + np.sqrt(1 - r**2) * u
        u = norm.cdf(z)

        # Generate the observed responses
        y = negbinom(u, mu=mu[hyp], scale=scale)

        # Fit the null model
        m0 = sm.GEE(y,
                    x0,
                    groups=grp,
                    cov_struct=cov_struct,
                    family=sm.families.Poisson())
        r0 = m0.fit(scale='X2')
        scales[0].append(r0.scale)

        # Fit the alternative model
        m1 = sm.GEE(y,
                    x,
                    groups=grp,
                    cov_struct=cov_struct,
                    family=sm.families.Poisson())
        r1 = m1.fit(scale='X2')
        scales[1].append(r1.scale)

        # Carry out the score test
        st = m1.compare_score_test(r0)
        pv.append(st["p-value"])

    pv = np.asarray(pv)
    rslt = [np.mean(pv), np.mean(pv < 0.1)]

    return rslt, scales

Beispiel #41

0

Datei anzeigen

Datei: find_normal_parameters.py Projekt: ksahlin/GetDistr

def estimate_params_for_normal(x, low_bound , mu_initial, sigma_initial):
	"""
		Takes a vector x of truncated data with a known lower
		truncation bound and estimates the parameters of the 
		fit of an untruncated normal distribution.
		code from Chris Fonnesbeck's Python data analysis tutorial on Sense
		https://sense.io/prometheus2305/data-analysis-in-python/files/Statistical%20Data%20Modeling.py
	"""


	# normalize vector
	mu_initial = float(mu_initial)
	sigma_initial = float(sigma_initial)
	#x = np.random.normal(size=10000,loc=2000,scale= 2000)

	x = map(lambda y: (y-mu_initial )/sigma_initial ,x)
	a =  (low_bound - mu_initial)/sigma_initial # normalize lower bound
	

	#_ = plt.hist(x, bins=100)
	#plt.show()
	#plt.close()

	# We can construct a log likelihood for this function using the conditional
	# form	
	trunc_norm = lambda theta, a, x: -(np.log(norm.pdf(x, theta[0], theta[1])) - 
	                                      np.log(1 - norm.cdf(a, theta[0], theta[1]))).sum()

	# For this example, we will use another optimization algorithm, the
	# **Nelder-Mead simplex algorithm**. It has a couple of advantages: 
	# 
	# - it does not require derivatives
	# - it can optimize (minimize) a vector of parameters
	# 
	# SciPy implements this algorithm in its `fmin` function:

	# we have normalized data, given that the loer truncation point a
	# is pretty far out in the tail - the standard normal parameters are
	# a first good guess, i.e. 0,1
	initial_guess = np.array([0,1]) 
	sol = fmin(trunc_norm, initial_guess , args=(a, x))
	print sol
	mean_normalized,stddev_normalized = sol[0],sol[1]
	mean_est =( 1 + mean_normalized ) * mu_initial
	stddev_est = stddev_normalized * sigma_initial
	print mean_est,stddev_est
	return mean_est,stddev_est

Beispiel #42

0

Datei anzeigen

def estimate_params_for_normal(x, low_bound, mu_initial, sigma_initial):
    """
		Takes a vector x of truncated data with a known lower
		truncation bound and estimates the parameters of the 
		fit of an untruncated normal distribution.
		code from Chris Fonnesbeck's Python data analysis tutorial on Sense
		https://sense.io/prometheus2305/data-analysis-in-python/files/Statistical%20Data%20Modeling.py
	"""

    # normalize vector
    mu_initial = float(mu_initial)
    sigma_initial = float(sigma_initial)
    #x = np.random.normal(size=10000,loc=2000,scale= 2000)

    x = map(lambda y: (y - mu_initial) / sigma_initial, x)
    a = (low_bound - mu_initial) / sigma_initial  # normalize lower bound

    #_ = plt.hist(x, bins=100)
    #plt.show()
    #plt.close()

    # We can construct a log likelihood for this function using the conditional
    # form
    trunc_norm = lambda theta, a, x: -(np.log(norm.pdf(x, theta[0], theta[
        1])) - np.log(1 - norm.cdf(a, theta[0], theta[1]))).sum()

    # For this example, we will use another optimization algorithm, the
    # **Nelder-Mead simplex algorithm**. It has a couple of advantages:
    #
    # - it does not require derivatives
    # - it can optimize (minimize) a vector of parameters
    #
    # SciPy implements this algorithm in its `fmin` function:

    # we have normalized data, given that the loer truncation point a
    # is pretty far out in the tail - the standard normal parameters are
    # a first good guess, i.e. 0,1
    initial_guess = np.array([0, 1])
    sol = fmin(trunc_norm, initial_guess, args=(a, x))
    print sol
    mean_normalized, stddev_normalized = sol[0], sol[1]
    mean_est = (1 + mean_normalized) * mu_initial
    stddev_est = stddev_normalized * sigma_initial
    print mean_est, stddev_est
    return mean_est, stddev_est

Beispiel #43

0

Datei anzeigen

Datei: sigTests.py Projekt: shawnzhangyx/PePr

def per_chr_nbtest(read_array, chr, swap,threshold, peaktype,difftest, start1,end1,start2,end2,test_rep,control_rep):
    t1 = time.time()
    sig_peaks_list = []
    y_bar_array = numpy.mean(read_array[:, start1:end1], 1)
    x_bar_array = numpy.mean(read_array[:, start2:end2], 1)
    if swap: #swap the chip and control reads.
        x_bar_array, y_bar_array = y_bar_array, x_bar_array
    cand_index = get_candidate_window2( x_bar_array,
                    y_bar_array, control_rep, test_rep, threshold)
    debug("There are %d candidate windows for %s (PID:%d)", len(cand_index), chr, os.getpid())
    if not swap:
        disp_list = numpy.array([estimate_area_dispersion_factor(read_array,
                test_rep, control_rep, idx, peaktype, difftest)
                for idx in cand_index])
    else:
        disp_list = numpy.array([estimate_area_dispersion_factor(read_array,
                control_rep, test_rep, idx, peaktype, difftest)
                for idx in cand_index])
    #debug("finished estimating dispersion for %s", chr)
   # return []
    cand_x_bar_array = x_bar_array[cand_index]
    cand_y_bar_array = y_bar_array[cand_index]
    gamma_array = cand_y_bar_array / cand_x_bar_array
    tau_hat_array = numpy.sqrt(cand_y_bar_array*
            ((control_rep*cand_x_bar_array*(disp_list+cand_y_bar_array)) +
            (test_rep*cand_y_bar_array*(disp_list+cand_x_bar_array)))/
            (test_rep*control_rep*disp_list*(cand_x_bar_array**3)))

    gamma_hat = 1.0 #Null hypothesis
    z_score_array = ((numpy.log(gamma_array)-numpy.log(gamma_hat))*
            gamma_array/tau_hat_array)
    pval_array = norm.cdf(-z_score_array)
    test_index = numpy.where(pval_array<threshold)
    test_index = test_index[0]
    sig_index = cand_index[test_index]
    sig_pval = pval_array[test_index]
    sig_group1_count = cand_y_bar_array[test_index]
    sig_group2_count = cand_x_bar_array[test_index]
    #sig_disp = disp_list[test_index]
    for i, a in enumerate(test_index):
        sig_peaks_list.append(Peak(chr, sig_index[i], sig_group1_count[i], sig_group2_count[i], sig_pval[i], 0))
    t2 = time.time()
    debug ("Analysis finished for %s, used %f sec CPU time", chr, t2-t1)
    return sig_peaks_list

Beispiel #44

0

Datei anzeigen

Datei: morestats.py Projekt: mbentz80/jzigbeercp

def compare_medians_ms(group_1, group_2, axis=None):
    """Compares the medians from two independent groups along the given axis.
    Returns an array of p values.
    The comparison is performed using the McKean-Schrader estimate of the standard
    error of the medians.    
    
:Inputs:
    group_1 : sequence
        First dataset.
    group_2 : sequence
        Second dataset.
    axis : integer *[None]*
        Axis along which the medians are estimated. If None, the arrays are flattened.
    """
    (med_1, med_2) = (mmedian(group_1, axis=axis), mmedian(group_2, axis=axis))
    (std_1, std_2) = (stde_median(group_1, axis=axis), 
                      stde_median(group_2, axis=axis)) 
    W = abs(med_1 - med_2) / sqrt(std_1**2 + std_2**2)
    return 1 - norm.cdf(W)

Beispiel #45

0

Datei anzeigen

Datei: summary.py Projekt: unrahul/fastreg

def param_table(beta, sigma, names):
    # standard errors
    stderr = np.sqrt(sigma.diagonal())

    # confidence interval
    low95 = beta - z95*stderr
    high95 = beta + z95*stderr

    # p-value
    zscore = beta/stderr
    pvalue = 2*(1-norm.cdf(np.abs(zscore)))

    # return all
    return pd.DataFrame({
        'coeff': beta,
        'stderr': stderr,
        'low95': low95,
        'high95': high95,
        'pvalue': pvalue
    }, index=names)

Beispiel #46

0

Datei anzeigen

def plotzfe():
    """ Test Simulation 1-2
        Plot theoretical vs. simulated results for zero forcing equalizer."""
    nsample = 10**5
    snrlst = range(0,19)

    # Calculate the theoretical values
    pe = []
    coeff = zero_forcing_coeff(tap2, 41)
    for snr in snrlst:
        delta_square = 10**(-snr/10.)*sum(coeff**2)
        pe.append(1-norm.cdf(sqrt((1-0.41)**2/delta_square)))
    
    plt.semilogy(snrlst,pe,snrlst,equalizer(2,41,snrlst,nsample,'zfir'),"-.")
    plt.legend(("Theoretical curve", "41 taps simulation"), loc='lower left')
    plt.title("Theoretical vs. Simulated performances")
    plt.xlabel("SNR (dB)")
    plt.ylabel("SER (dB)")
    plt.grid(True, which='both')
    plt.show()

Beispiel #47

0

Datei anzeigen

def plotdfe2():
    nsample = 10**5
    snrlst = range(0,19)
    nzf = 41
    tap = tap2
    
    pe = []
    for snr in snrlst:
        cj,_ = dfe_coeff(tap, nzf, 41, snr)
        f = [0]*(len(cj)-len(tap)) + list(tap)[::-1]
        jmin = 1-sum(np.array(f)*cj)
        gamma = (1-jmin)/jmin
        pe.append(1-norm.cdf(sqrt(gamma)))
    plt.semilogy(snrlst,pe,snrlst, equalizer(2, 41, snrlst, nsample, 'dfe'), "-.")
    plt.legend(("Theoretical curve", "Simulated curve"), loc='lower left')
    plt.title("Theoretical vs. Simulated performances for Channel 1")
    plt.xlabel("SNR (dB)")
    plt.ylabel("SER (dB)")
    plt.grid(True, which='both')
    plt.show()

Beispiel #48

0

Datei anzeigen

Datei: statistical_tools.py Projekt: zbxzc35/PLearn

def kurtosistest(a, axis=-1):
    """Tests whether a dataset has normal kurtosis

    That is, test whether kurtosis=3(n-1)/(n+1). Valid only for n>20.  Axis
    can equal None (ravel array first), an integer (the axis over which to
    operate), or a sequence (operate over multiple axes).

    NOTE: This function is mostly copied from scipy.stats.stats, but
    corrects for a major bug: the pvalue returned by SciPy is not valid
    when the kurtosis is negative! The return values also are slightly
    different: the kurtosis is actually returned will the z-score is not.

    Returns: kurtosis and 2-tail z-probability.
    """
    a, axis = _chk_asarray(a, axis)
    n = float(a.shape[axis])
    if n < 20:
        print "kurtosistest only valid for n>=20 ... continuing anyway, n=", n
    kurt = kurtosis(a, axis)
    E = 3.0 * (n - 1) / (n + 1)
    varkurt = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) *
                                              (n + 5))
    x = (kurt - E) / sqrt(varkurt)
    sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * sqrt(
        (6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3)))
    A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 + sqrt(1 + 4.0 /
                                                        (sqrtbeta1**2)))
    term1 = 1 - 2 / (9.0 * A)
    denom = 1 + x * sqrt(2 / (A - 4.0))
    denom = where(less(denom, 0), 99, denom)
    term2 = where(equal(denom, 0), term1, power((1 - 2.0 / A) / denom,
                                                1 / 3.0))
    Z = (term1 - term2) / sqrt(2 / (9.0 * A))
    Z = where(equal(denom, 99), 0, Z)

    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2 * (1 - norm.cdf(abs(Z)))
    assert pvalue >= 0.0 and pvalue <= 1.0
    return kurt, pvalue

Beispiel #49

0

Datei anzeigen

Datei: statistical_tools.py Projekt: zbxzc35/PLearn

def autocorrelation(series, k=1, biased=True):
    """Returns autocorrelation of order 'k' and corresponding two-tailed pvalue.

    (Inspired by CLM pp.45-47)

    @param series: The series on which to compute autocorrelation
    @param k:      The order to which compute autocorrelation
    @param biased: If False, rho_k will be corrected according to Fuller (1976)

    @return: rho_k, pvalue
    """
    T = len(series)
    mu = mean(series)
    sigma = var(series)

    # Centered observations
    obs = series - mu
    lagged = lag(obs, k)
    truncated = obs[:-k]
    assert len(lagged) == len(truncated)

    # Multiplied by 'T' for numerical stability
    gamma_k = T * add.reduce(truncated * lagged)  # Numerator
    gamma_0 = T * add.reduce(obs * obs)  # Denominator
    rho_k = (gamma_k / gamma_0)
    if rho_k > 1.0: rho_k = 1.0  # Correct for numerical errors

    # The standard normal random variable
    Z = sqrt(T) * rho_k

    # Bias correction?
    if not biased:
        rho_k += (1 - rho_k**2) * (T - k) / (T - 1)**2
        Z = rho_k * T / sqrt(T - k)

    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2 * (1 - norm.cdf(abs(Z)))
    assert pvalue >= 0.0 and pvalue <= 1.0
    return rho_k, pvalue

Beispiel #50

0

Datei anzeigen

Datei: statistical_tools.py Projekt: deccs/PLearn

def autocorrelation(series, k=1, biased=True):
    """Returns autocorrelation of order 'k' and corresponding two-tailed pvalue.

    (Inspired by CLM pp.45-47)

    @param series: The series on which to compute autocorrelation
    @param k:      The order to which compute autocorrelation
    @param biased: If False, rho_k will be corrected according to Fuller (1976)

    @return: rho_k, pvalue
    """
    T = len(series)
    mu = mean(series)
    sigma = var(series)

    # Centered observations
    obs = series-mu    
    lagged = lag(obs, k) 
    truncated = obs[:-k]
    assert len(lagged) == len(truncated)

    # Multiplied by 'T' for numerical stability
    gamma_k = T*add.reduce(truncated*lagged)  # Numerator
    gamma_0 = T*add.reduce(obs*obs)           # Denominator
    rho_k   = (gamma_k / gamma_0)
    if rho_k > 1.0: rho_k = 1.0   # Correct for numerical errors

    # The standard normal random variable
    Z = sqrt(T)*rho_k
    
    # Bias correction?
    if not biased:
        rho_k += (1 - rho_k**2) * (T-k)/(T-1)**2
        Z = rho_k * T/sqrt(T-k)

    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2*( 1 - norm.cdf(abs(Z)) )
    assert pvalue >= 0.0 and pvalue <= 1.0
    return rho_k, pvalue

Beispiel #51

0

Datei anzeigen

Datei: gee_generate_tests.py Projekt: locolucco209/MongoScraper

def generate_logistic():

    # Number of clusters
    nclust = 100

    # Regression coefficients
    beta = np.array([1, -2, 1], dtype=np.float64)

    ## Covariate correlations
    r = 0.4

    ## Cluster effects of covariates
    rx = 0.5

    ## Within-cluster outcome dependence
    re = 0.3

    p = len(beta)

    OUT = open("gee_logistic_1.csv", "w")

    for i in range(nclust):

        n = np.random.randint(3, 6)  # Cluster size

        x = np.random.normal(size=(n, p))
        x = rx * np.random.normal() + np.sqrt(1 - rx**2) * x
        x[:, 2] = r * x[:, 1] + np.sqrt(1 - r**2) * x[:, 2]
        pr = 1 / (1 + np.exp(-np.dot(x, beta)))
        z = re*np.random.normal() +\
            np.sqrt(1-re**2)*np.random.normal(size=n)
        u = norm.cdf(z)
        y = 1 * (u < pr)

        for j in range(n):
            OUT.write("%d,%d," % (i, y[j]))
            OUT.write(",".join(["%.3f" % b for b in x[j, :]]) + "\n")

    OUT.close()

Beispiel #52

0

Datei anzeigen

Datei: gee_generate_tests.py Projekt: 0ceangypsy/statsmodels

def generate_logistic():

    # Number of clusters
    nclust = 100

    # Regression coefficients
    beta = np.array([1,-2,1], dtype=np.float64)

    ## Covariate correlations
    r = 0.4

    ## Cluster effects of covariates
    rx = 0.5

    ## Within-cluster outcome dependence
    re = 0.3

    p = len(beta)

    OUT = open("gee_logistic_1.csv", "w")

    for i in range(nclust):

        n = np.random.randint(3, 6) # Cluster size

        x = np.random.normal(size=(n,p))
        x = rx*np.random.normal() + np.sqrt(1-rx**2)*x
        x[:,2] = r*x[:,1] + np.sqrt(1-r**2)*x[:,2]
        pr = 1/(1+np.exp(-np.dot(x, beta)))
        z = re*np.random.normal() +\
            np.sqrt(1-re**2)*np.random.normal(size=n)
        u = norm.cdf(z)
        y = 1*(u < pr)

        for j in range(n):
            OUT.write("%d,%d," % (i, y[j]))
            OUT.write(",".join(["%.3f" % b for b in x[j,:]]) + "\n")

    OUT.close()

Beispiel #53

0

Datei anzeigen

Datei: statistical_tools.py Projekt: deccs/PLearn

def kurtosistest(a,axis=-1):
    """Tests whether a dataset has normal kurtosis

    That is, test whether kurtosis=3(n-1)/(n+1). Valid only for n>20.  Axis
    can equal None (ravel array first), an integer (the axis over which to
    operate), or a sequence (operate over multiple axes).

    NOTE: This function is mostly copied from scipy.stats.stats, but
    corrects for a major bug: the pvalue returned by SciPy is not valid
    when the kurtosis is negative! The return values also are slightly
    different: the kurtosis is actually returned will the z-score is not.

    Returns: kurtosis and 2-tail z-probability.
    """
    a, axis = _chk_asarray(a, axis)
    n = float(a.shape[axis])
    if n<20:
        print "kurtosistest only valid for n>=20 ... continuing anyway, n=",n
    kurt = kurtosis(a,axis)
    E = 3.0*(n-1) /(n+1)
    varkurt = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5))
    x = (kurt-E)/sqrt(varkurt)
    sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * sqrt((6.0*(n+3)*(n+5))/
                                                       (n*(n-2)*(n-3)))
    A = 6.0 + 8.0/sqrtbeta1 *(2.0/sqrtbeta1 + sqrt(1+4.0/(sqrtbeta1**2)))
    term1 = 1 -2/(9.0*A)
    denom = 1 +x*sqrt(2/(A-4.0))
    denom = where(less(denom,0), 99, denom)
    term2 = where(equal(denom,0), term1, power((1-2.0/A)/denom,1/3.0))
    Z = ( term1 - term2 ) / sqrt(2/(9.0*A))
    Z = where(equal(denom,99), 0, Z)
    
    # The two-tailed p-value is twice the prob that value of a std normal r.v.
    # turns out to be greater than the (absolute) value of Z
    pvalue = 2*( 1 - norm.cdf(abs(Z)) )
    assert pvalue >= 0.0 and pvalue <= 1.0
    return kurt, pvalue

Beispiel #54

0

Datei anzeigen

Datei: statistical_tools.py Projekt: zbxzc35/PLearn

def variance_ratio(series, q, rw_hypothesis=1):
    """Returns 'VR(q)' and the corresponding pvalue.

    VR(q) here refers to the variance ratio suggested in Campbell, Lo and
    MacKinlay (1997), pp.49-57.

    @param series:        The series on which to compute VR.
    @param q:             Number of periods of the long-horizon return in VR
    @param rw_hypothesis: Which null hypothesis to test against. The value
      must be in [0, 1, 3]. Zero is a special value under which no pvalue is
      reported. One and three lead to the use of RW1 and RW3.

    @return: VR_q [, pvalue -- if rw_hypothesis!=0 ]
    """
    assert q > 1
    T = len(series)
    qf = float(q)
    VR_q = 1.0
    for k in range(1, q):  # Will sum till q-1 as desired
        VR_q += 2.0 * (1.0 -
                       (k / q)) * autocorrelation(series, k, biased=True)[0]

    # Zero is a special value under which no pvalue is reported
    if rw_hypothesis == 0:
        return VR_q

    # TBReplaced by n*q in pp.52-55's version...
    nq = float(T - 1)
    Z = sqrt(nq) * (VR_q - 1.0)

    if rw_hypothesis == 1:
        Z /= sqrt(2.0 * (2 * q - 1) * (q - 1) / (3.0 * q))
        return VR_q, 2 * (1 - norm.cdf(abs(Z)))

    if rw_hypothesis == 3:
        raise NotImplementedError
    raise ValueError("'rw_hypothesis' must be in [0,1,3].")

Beispiel #55

0

Datei anzeigen

Datei: gee_generate_tests.py Projekt: 0ceangypsy/statsmodels

def generate_nominal():

    ## Regression coefficients
    beta1 = np.r_[0.5, 0.5]
    beta2 = np.r_[-1, -0.5]
    p = len(beta1)

    rz = 0.5

    OUT = open("gee_nominal_1.csv", "w")

    for i in range(200):

        n = np.random.randint(3, 6) # Cluster size

        x = np.random.normal(size=(n,p))
        x[:,0] = 1
        for j in range(1,x.shape[1]):
            x[:,j] += np.random.normal()
        pr1 = np.exp(np.dot(x, beta1))[:,None]
        pr2 = np.exp(np.dot(x, beta2))[:,None]
        den = 1 + pr1 + pr2
        pr = np.hstack((pr1/den, pr2/den, 1/den))
        cpr = np.cumsum(pr, 1)

        z = rz*np.random.normal() +\
            np.sqrt(1-rz**2)*np.random.normal(size=n)
        u = norm.cdf(z)

        y = (u[:,None] > cpr).sum(1)

        for j in range(n):
            OUT.write("%d,%d," % (i, y[j]))
            OUT.write(",".join(["%.3f" % b for b in x[j,:]]) + "\n")

    OUT.close()

Beispiel #56

0

Datei anzeigen

Datei: statistical_tools.py Projekt: deccs/PLearn

def variance_ratio(series, q, rw_hypothesis=1):
    """Returns 'VR(q)' and the corresponding pvalue.

    VR(q) here refers to the variance ratio suggested in Campbell, Lo and
    MacKinlay (1997), pp.49-57.

    @param series:        The series on which to compute VR.
    @param q:             Number of periods of the long-horizon return in VR
    @param rw_hypothesis: Which null hypothesis to test against. The value
      must be in [0, 1, 3]. Zero is a special value under which no pvalue is
      reported. One and three lead to the use of RW1 and RW3.

    @return: VR_q [, pvalue -- if rw_hypothesis!=0 ]
    """
    assert q > 1
    T = len(series)
    qf = float(q)
    VR_q = 1.0
    for k in range(1, q): # Will sum till q-1 as desired
        VR_q += 2.0 * (1.0 - (k/q)) * autocorrelation(series, k, biased=True)[0]   

    # Zero is a special value under which no pvalue is reported
    if rw_hypothesis==0:
        return VR_q

    # TBReplaced by n*q in pp.52-55's version...        
    nq = float(T-1)
    Z = sqrt(nq) * (VR_q - 1.0)

    if rw_hypothesis==1:
        Z /= sqrt(2.0*(2*q - 1)*(q-1) / (3.0*q))
        return VR_q, 2*( 1 - norm.cdf(abs(Z)) )

    if rw_hypothesis==3:
        raise NotImplementedError
    raise ValueError("'rw_hypothesis' must be in [0,1,3].")