def _update_parameters(self): """Update mean sd etc after we have updated our samples """ self.n = len(self.members) self.mean = self.orig_mean = StatTool.calc_mean(self.members) self.sd = self.orig_sd = StatTool.calc_sd(self.members, self.is_population) print("Got %d samples, mean: %.3f, sd: %.3f" % (self.n, self.mean, self.sd))
def simple_decision(cls, r, n, alphas): """Decide whether to accept or reject the null hypothesis. """ df = n - 2 t = (r * math.sqrt(df)) / math.sqrt(1 - r**2) p = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df) for alpha in alphas: ci = StatTool.pearson_r_confidence_interval(r, alpha, n) conclusion1 = cls.spell_conclusion_by_ci(ci) conclusion2 = cls.spell_conclusion_by_p(p, alpha) print("For alpha=%.4f:" % alpha) print(" - %s" % conclusion1) print(" - %s" % conclusion2) print("")
def _fix_samples(self): """Decide if the two samples are dependent and we should do something about it""" samp_dependent = not self.samp0.is_population and not self.samp1.is_population if samp_dependent and self.samp0.members and self.samp1.members: members = [self.samp1.members[i] - self.samp0.members[i] for i in range(len(self.samp1.members))] self.samp0.mean = 0.0 self.samp0.sd = None self.samp0.notes = "mean and sd have been reset" self.samp1.mean = StatTool.calc_mean(members) self.samp1.sd = StatTool.calc_sd(members, self.samp1.is_population) self.samp1.notes = "mean and sd are difference from sample-0" elif samp_dependent and self.samp0.orig_sd is not None and self.samp1.orig_sd is not None: self.samp1.sd = self.standard_deviation_difference() self.samp1.notes = "sd is difference from sample-0"
def margin_of_error(self): """Margin of error is the distance from μ (in either direction), expressed in the same unit as the original sample, where it is within the confidence interval (CI). Margin of error is half the width of the CI. For example, if μ is $50, and alpha is 5%, and Standard Error is $15. Then for 95% confidence, the confidence interval is between $50 - (1.96 * $15) and $50 + (1.96 * $15). In this case, (1.96 * $15) is the margin of error. """ z_critical2 = StatTool.z_critical_value(self.alpha, StatTool.TWO_TAILED_TEST) return z_critical2 * self.SEM()
def simple_calculation(cls, k, n, f_ratio, alphas): if f_ratio < 0: print("Cannot make decision because F is negative") return df_n = k - 1 df_d = (k * n) - k print("df_n: %d" % df_n) print("df_d: %d" % df_d) print("F-ratio: % .3f" % f_ratio) for alpha in alphas: critical = StatTool.f_critical_value(alpha, df_n, df_d) print("alpha %.3f: critical: %.3f conclusion: %s" % (alpha, critical, "reject H0" if f_ratio >= critical else "accept H0"))
def _calc(self): qstar = StatTool.q_value(self.alpha, self.df_n(), self.df_d()) msw = self.mean_squares_within() k = len(self.groups) return qstar * math.sqrt( sum([msw / grp.n for grp in self.groups]) / k)
def _calc(self): return StatTool.probability_for_f(self.score(), self.df_n(), self.df_d())
def _calc(self): return StatTool.f_critical_value(self.alpha, self.df_between(), self.df_within())
def p_value(self): """Returns the probability (in proportion) for the result's t-statistic value. """ return StatTool.probability_for_t(self.t_statistics(), self.dir, self.df())
def critical(self): """t-critical value for the specified confidence/alpha. Value may be negative!""" return StatTool.t_critical_value(self.alpha, self.dir, self.df())
def p_value(self): """Returns the probability (in proportion) for the result's z-score. """ return StatTool.probability_for_z(self.score(), self.dir)
def critical(self): """Z-critical value for the specified alpha and direction""" return StatTool.z_critical_value(self.alpha, self.dir)
def spell_type_of_test(self): """Spell the type of t-test""" return StatTool.spell_directionality(self.dir)
def print_correlation(self, grp0, grp1): print("%s --> %s correlation" % (grp0.title, grp1.title)) print('-' * 70) r, _ = StatTool.pearson_r(grp0.members, grp1.members) """r, also called Pearson's r, is correlation coefficient, to quantify relationship. r measures the correlatoin for the sample cov(x,y) r = --------- Sx * Sy """ r_squared = r**2 """"r squared (r^2): r^2 = % of variation in Y explained by variation in x r^2 = coefficient of determination """ df = grp0.n - 2 """"Degree of freedom. We substract one from each sample""" # Convert r to t t = (r * math.sqrt(df)) / math.sqrt(1 - r**2) # Calculate the probability for t p = pval2 = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df) ci = StatTool.pearson_r_confidence_interval(r, self.alpha, grp0.n) """ if ρ (rho) is true correlation for population, CI is the confidence interval for ρ, meaning the range of likely values for the population correlation coefficient ρ. """ conclusion1 = self.spell_conclusion_by_ci(ci) conclusion2 = self.spell_conclusion_by_p(p, self.alpha) slope = r * grp1.sd / grp0.sd """Slope for linear regression.""" intercept = grp1.mean - slope * grp0.mean """The regression line always goes through the mean.""" def calc_se_est(x, y, slope, intercept): # Calculate standard error of the estimate. # y = slope * x + intercept ss = 0.0 n = len(x) for i in range(n): ss += (y[i] - (slope * x[i] + intercept))**2 return math.sqrt(ss / float(n - 2)) se_est = calc_se_est(grp0.members, grp1.members, slope, intercept) """Standard error of the estimate, measures the accuracy of our regression line compared to the actual data.""" print("DF : % d" % df) print("Pearson r : % .3f" % r) print("r^2 (coef of determ.) : % .3f (%.2f%%)" % (r_squared, r_squared * 100.0)) print("Confidence interval : % .3f - %.3f" % ci) print("t-statistic : % .3f" % t) print("P-value : % .5f" % p) print("Conclusion : - %s" % conclusion1) print(" - %s" % conclusion2) print("Linear regression:") print("Slope : % .3f" % slope) print("Intercept : % .3f" % intercept) print("Standard err. of estimate: % .3f" % se_est) print("")