def confidence_interval(data, percentile, number_of_tails): """ Computes confidence interval for the mean of the given data covering the probability given by the variable percentile. * data specifies the given data. * precentile gives the probability that the mean of the data is in the confidence interval. * number_of_tails gives the number of tails of the normal distribution to be considered. """ square = math.pow(len(data), 0.5) mean = statchar.aritmetic_mean(data) std_dev = statchar.std_dev_corrected(data) dist = distributions.NormalDistribution(0, 1) if number_of_tails == 2: alfa = (1-percentile)/2 c = distributions.find_percentile(1-alfa, dist.cdf) left = mean - c*std_dev/square right = mean + c*std_dev/square return left, right elif number_of_tails == 1: alfa = percentile c = distributions.find_percentile(alfa, dist.cdf) SEM = statchar.std_error_of_mean(data) left = mean right = mean + c*SEM/square return right
def confidence_interval_theoretical(n, mean, std_dev, percentile, number_of_tails, SEM=0): """ Computes confidence interval for the mean of the implicite data covering the probability given by the variable percentile. * n specifies the number of observations in the implicit data. * mean specifies the mean of the implicit data. * std_dev specifies the standard deviation of the implicit data. * precentile gives the probability that the mean of the implicit data is in the confidence interval * number_of_tails gives the number of tails of the normal distribution to be considered. * SEM gives the estimated standard error of means, it should be used only if tails == 1. """ square = math.pow(n, 0.5) dist = distributions.NormalDistribution(0, 1) if number_of_tails == 2: alfa = (1-percentile)/2 c = distributions.find_percentile(1-alfa, dist.cdf) left = mean - c*std_dev/square right = mean + c*std_dev/square return left, right elif number_of_tails == 1: alfa = percentile c = distributions.find_percentile(alfa, dist.cdf) right = mean + c*SEM/square return right
def test_results(self): normal_dist = distributions.NormalDistribution(0, 1) t_dist = distributions.t_Distribution(7) result_norm = distributions.find_percentile(0.95, normal_dist.cdf) with self.assertRaises(distributions.PercentileError): distributions.find_percentile(1.2, normal_dist.cdf) cdf = distributions.NormalDistribution.normal_cdf result_with_args = distributions.find_percentile(0.8, cdf, (2, 5)) self.assertTrue(1.644 < result_norm and result_norm < 1.645)
def kolmogorov_smirnov_test(data, distribution, percentile): """ Computes the Kolmogorov-Smirnov test. This test evaluates whether the data presented comes from the distribution given. percentile specifies the desired porbability. The null hypothesis is that the data is from the given distribution. - For more information: http://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test# Kolmogorov.E2.80.93Smirnov_statistic - """ if percentile >= 1 or percentile <= 0: raise PercentileError() data = sorted(data) F = distribution.cdf E = distributions.empirical_cdf K = distributions.KolmogorovDistribution.kolmogorov_cdf potential_statistic_list = [] for element in data: potential_statistic_list.append(abs(E(element, data)-F(element))) Kolmogorov_Smirnov_statistic = max(potential_statistic_list) q = math.pow(len(data), 0.5) critical = distributions.find_percentile(percentile, K) if math.pow(len(data), 0.5)*Kolmogorov_Smirnov_statistic > critical: return False else: return True
def decision(self, percentile): critical = distributions.find_percentile( percentile, distributions.FisherDistribution.f_cdf, [self.DFtreat, self.DFres]) if self.F > critical: return "Reject null hypothesis." else: return "Accept null hyphothesis."