예제 #1
0
파일: stats.py 프로젝트: apaloczy/ap_tools
def ci_mean(m_sample, s_sample, ndof_eff, alpha=0.95, verbose=True):
	"""
	Calculates a confidence interval at the 'alpha' significance level
	for the sample mean of a normally-distributed random variable with
	sample mean 'm_sample', sample standard deviation 's_sample' and
	effective degrees of freedom 'ndof_eff'.

	References
	----------
	TODO

	Example
	-------
	TODO
	"""
	# z-score of the CI associated with the given significance level 'alpha'.
	# Standard normal curve is symmetric about the mean (0), therefore take
	# only the upper CI.
	zs = norm.interval(alpha)[1]
	# Lower (upper) 100 * alpha % confidence interval.
	std_err = s_sample/np.sqrt(ndof_eff)
	xl = m_sample - zs*std_err
	xu = m_sample + zs*std_err

	if verbose:
		print("")
		print("Sample mean CI (xl,xu): (%.3f,%.3f)"%(xl, xu))
		print("")

	return (xl,xu)
예제 #2
0
def ci_mean(m_sample, s_sample, ndof_eff, alpha=0.95, verbose=True):
	"""
	Calculates a confidence interval at the 'alpha' significance level
	for the sample mean of a normally-distributed random variable with
	sample mean 'm_sample', sample standard deviation 's_sample' and
	effective degrees of freedom 'ndof_eff'.

	References
	----------
	TODO

	Example
	-------
	TODO
	"""
	# z-score of the CI associated with the given significance level 'alpha'.
	# Standard normal curve is symmetric about the mean (0), therefore take
	# only the upper CI.
	zs = norm.interval(alpha)[1]
	# Lower (upper) 100 * alpha % confidence interval.
	std_err = s_sample/np.sqrt(ndof_eff)
	xl = m_sample - zs*std_err
	xu = m_sample + zs*std_err

	if verbose:
		print ""
		print "Sample mean CI (xl,xu): (%.3f,%.3f)"%(xl, xu)
		print ""

	return (xl,xu)
예제 #3
0
파일: stats.py 프로젝트: apaloczy/ap_tools
def rci_fisher(r, ndof_eff, alpha=0.95, verbose=True):
	"""
	Calculate a confidence interval for the Pearson correlation coefficient r
	between two series 'x' and 'y' using the Fisher's transformation method.

	References
	----------
	Cox (2008): Speaking Stata: Correlation with confidence, or Fisher’s z revisited.
	The Stata Journal (2008) 8, Number 3, pp. 413-439.

	https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient

	Example
	-------
	TODO
	"""
	## OBS: equivalent to the form z_r = 0.5*np.log((1+r)/(1-r))
	## which is also commonly found in the literature.
	z_r = np.arctanh(r)   # Transform r to a standard normal variable z_r.
	se_r = 1./np.sqrt(ndof_eff-3) # Standard error of the transformed distribution.

	# z-score of the CI associated with the given significance level 'alpha'.
	# Standard normal curve is symmetric about the mean (0), therefore take
	# only the upper CI.
	zs = norm.interval(alpha)[1]
	# Lower (upper) 100 * alpha % confidence interval.
	z_xl = z_r - zs*se_r
	z_xu = z_r + zs*se_r

	## Use the inverse transformation to convert intervals
	## in the z-scale back to the r-scale.
	xl = np.tanh(z_xl)
	xu = np.tanh(z_xu)

	if verbose:
		print("")
		print("Fisher transform CI (xl,xu): (%.3f,%.3f)"%(xl, xu))
		print("")

	return (xl,xu)
예제 #4
0
def rci_fisher(r, ndof_eff, alpha=0.95, verbose=True):
	"""
	Calculate a confidence interval for the Pearson correlation coefficient r
	between two series 'x' and 'y' using the Fisher's transformation method.

	References
	----------
	Cox (2008): Speaking Stata: Correlation with confidence, or Fisher’s z revisited.
	The Stata Journal (2008) 8, Number 3, pp. 413-439.

	https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient

	Example
	-------
	TODO
	"""
	## OBS: equivalent to the form z_r = 0.5*np.log((1+r)/(1-r))
	## which is also commonly found in the literature.
	z_r = np.arctanh(r)   # Transform r to a standard normal variable z_r.
	se_r = 1./np.sqrt(ndof_eff-3) # Standard error of the transformed distribution.

	# z-score of the CI associated with the given significance level 'alpha'.
	# Standard normal curve is symmetric about the mean (0), therefore take
	# only the upper CI.
	zs = norm.interval(alpha)[1]
	# Lower (upper) 100 * alpha % confidence interval.
	z_xl = z_r - zs*se_r
	z_xu = z_r + zs*se_r

	## Use the inverse transformation to convert intervals
	## in the z-scale back to the r-scale.
	xl = np.tanh(z_xl)
	xu = np.tanh(z_xu)

	if verbose:
		print ""
		print "Fisher transform CI (xl,xu): (%.3f,%.3f)"%(xl, xu)
		print ""

	return (xl,xu)
예제 #5
0
                              data=FilteredSystem).fit()
CVEffect_Models = [CV_Model_CVEffect, F_CV_Model_CVEffect]
Datasets = ['Complete', 'Filtered']
SystemFitted = [System2Fit, FilteredSystem]

## Analyze distribution of Ln(CV)
for i in range(2):
    D = SystemFitted[i]['LogCV'].values
    D.sort()
    D_bar = np.mean(D)
    S_D = np.std(D, ddof=1)
    N_D = len(D)

    ## Kernel density estimation (Gaussian kernel)
    KernelEstimator = np.zeros(N_D)
    NormalIQR = np.abs(norm.interval(0.25, 0, 1)).sum()
    DataIQR = np.abs(np.quantile(D, 0.75)) - np.abs(np.quantile(D, 0.25))
    KernelHalfWidth = 0.9 * N_D**(-1 / 5) * S_D
    for Value in D:
        KernelEstimator += norm.pdf(D - Value, 0, KernelHalfWidth * 2)
    KernelEstimator = KernelEstimator / N_D

    ## Histogram and density distribution
    TheoreticalDistribution = norm.pdf(D, D_bar, S_D)
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D,
                          density=True,
                          bins=20,
                          edgecolor=(0, 0, 1),
                          color=(1, 1, 1),
                          label='Histogram')
예제 #6
0
def PermutationTest(x,y,NRepetition=45**2,SignificanceLevel=0.05):

    # Analyze data
    x_bar = np.mean(x)
    y_bar = np.mean(y)

    d = x_bar - y_bar

    XData = pd.DataFrame({'Values':x,'Group':'Control'},index=range(len(x)))
    YData = pd.DataFrame({'Values':y,'Group':'Test'},index=range(len(y)))

    Pool = XData.append(YData,ignore_index=True)
    N = len(Pool)

    D = np.zeros(NRepetition)
    for i in range(NRepetition):

        n = np.random.randint(1,N-1)

        SampleA = Pool.sample(n)
        SampleB = Pool.drop(SampleA.index)

        D[i] = SampleA['Values'].mean() - SampleB['Values'].mean()

    # Analyze distribution of D
    from scipy.stats.distributions import norm
    D.sort()
    D_bar = np.mean(D)
    S_D = np.std(D,ddof=1)
    N_D = len(D)

    # Kernel density estimation (Gaussian kernel)
    KernelEstimator = np.zeros(N_D)
    NormalIQR = np.abs(norm.interval(0.25,0,1)).sum()
    DataIQR = np.abs(np.quantile(D,0.75)) - np.abs(np.quantile(D,0.25))
    KernelHalfWidth = 0.9 * N_D ** (-1 / 5) * S_D
    for Value in D:
        KernelEstimator += norm.pdf(D - Value, 0, KernelHalfWidth * 2)
    KernelEstimator = KernelEstimator / N_D

    ## Histogram and density distribution
    TheoreticalDistribution = norm.pdf(D, D_bar, S_D)
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D, density=True, bins=20, edgecolor=(0, 0, 1), color=(1, 1, 1), label='Histogram')
    Axes.plot(D, KernelEstimator, color=(1, 0, 0), label='Kernel Density')
    Axes.plot(D, TheoreticalDistribution, linestyle='--', color=(0, 0, 0), label='Normal Distribution')
    plt.xlabel('D values')
    plt.ylabel('Density (-)')
    plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15), prop={'size':10})
    plt.show()
    plt.close(Figure)

    EmpiricalQuantiles = np.arange(0.5, N_D + 0.5) / N_D
    MinValue = np.quantile(D,SignificanceLevel / 2)
    MaxValue = np.quantile(D,1 - SignificanceLevel / 2)
    RejectionRange = np.array([[-np.inf,MinValue],[MaxValue,np.inf]])

    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D, density=True, bins=20, edgecolor=(0, 0, 1), color=(1, 1, 1), label='Histogram')
    Axes.fill_between([min(D),MinValue], [max(Histogram[0]),max(Histogram[0])], color=(0, 0, 0), alpha=0.1)
    Axes.fill_between([max(D),MaxValue], [max(Histogram[0]),max(Histogram[0])], color=(0, 0, 0), alpha=0.1, label='Rejection range')
    Axes.plot([d,d], [0,max(Histogram[0])], color=(1, 0, 0), label='Actual difference')
    plt.xlabel('D values')
    plt.ylabel('Density (-)')
    plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15), prop={'size':10})
    plt.show()
    plt.close(Figure)

    p = len(D[abs(D)>=abs(d)]) / len(D)

    # Z = (D - D_bar) / S_D
    # z_d = (d - D_bar) / S_D
    # TheoreticalQuantiles = norm.cdf(Z)
    #
    # # Compute range of Z values
    # D_zmin = D_bar - 10 * S_D
    # D_zmax = D_bar + 10 * S_D
    #
    # Step = 0.001
    # x = np.arange(D.min(), D.max(), Step)  # range of x in spec
    # y = norm.pdf(x, D_bar, S_D)
    #
    # x_all = np.arange(D_zmin, D_zmax, Step)  # entire range of x, both in and out of spec
    # # y_all = norm.pdf(x_all, D_bar, S_D)
    #
    # x_all = np.arange(-10, 10, Step)  # entire range of x, both in and out of spec
    # y_all = norm.pdf(x_all, 0, 1)
    # y_d = norm.pdf(z_d, 0, 1)
    #
    # y_sorted = np.zeros(len(y_all))
    # y_sorted += y_all
    # y_sorted.sort()
    #
    # CI = 0.95
    # y_area = 0
    # i = 1
    # while y_area / y_all.sum() < CI:
    #     y_area += y_sorted[-i]
    #     i += 1
    # z_CI = i / 2 * Step
    #
    # # Entire range of x, both in and out of spec
    # x_CI = np.arange(-z_CI, z_CI, Step)
    # y_CI = norm.pdf(x_CI, 0, 1)
    #
    # # Plot in data space
    # Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    # Axes.fill_between(x_CI, y_CI, 0, alpha=0.15, color=(0, 0, 0), label=str(int(0.95 * 100)) + '% CI')
    # Axes.plot([z_d,z_d], [0,y_d], color=(0, 0, 1), label='Difference Observed')
    # Axes.plot(x_all, y_all, color=(1, 0, 0), label='Normal distribution')
    # Axes.set_xlabel('Z values')
    # # plt.xlim([D_bar - 4.2 * S_D, D_bar + 4.2 * S_D])
    # plt.xlim([-5, 5])
    # plt.ylim([0, 0.45])
    # plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15))
    # # plt.show()
    # plt.close()
    #
    # # Plot in data space
    # d_CI = (z_CI + D_bar) * S_D
    # dx_CI = np.arange(-d_CI, d_CI, Step)
    # dy_CI = norm.pdf(dx_CI, D_bar, S_D)
    # d_y = norm.pdf(d, D_bar, S_D)
    #
    # Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    # Axes.fill_between(dx_CI, dy_CI, 0, alpha=0.15, color=(0, 0, 0), label=str(int(0.95 * 100)) + '% CI')
    # Axes.plot([d, d], [0, d_y], color=(0, 0, 1), label='Difference Observed')
    # Axes.plot(D, TheoreticalDistribution, color=(1, 0, 0), label='Normal distribution')
    # Axes.set_xlabel('D values')
    # plt.ylim([0,max(TheoreticalDistribution)*1.05])
    # plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15))
    # plt.show()

    return d, RejectionRange, p