Esempio n. 1
0
    def simple_decision(cls, r, n, alphas):
        """Decide whether to accept or reject the null hypothesis.
        """
        df = n - 2
        t = (r * math.sqrt(df)) / math.sqrt(1 - r**2)
        p = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df)

        for alpha in alphas:
            ci = StatTool.pearson_r_confidence_interval(r, alpha, n)
            conclusion1 = cls.spell_conclusion_by_ci(ci)
            conclusion2 = cls.spell_conclusion_by_p(p, alpha)
            print("For alpha=%.4f:" % alpha)
            print(" - %s" % conclusion1)
            print(" - %s" % conclusion2)
            print("")
Esempio n. 2
0
    def print_correlation(self, grp0, grp1):
        print("%s --> %s correlation" % (grp0.title, grp1.title))
        print('-' * 70)

        r, _ = StatTool.pearson_r(grp0.members, grp1.members)
        """r, also called Pearson's r, is correlation coefficient, to quantify relationship.
        r measures the correlatoin for the sample
             cov(x,y)
        r = ---------
             Sx * Sy
        """

        r_squared = r**2
        """"r squared (r^2):
           r^2 = % of variation in Y explained by variation in x
           r^2 = coefficient of determination
        """

        df = grp0.n - 2
        """"Degree of freedom. We substract one from each sample"""

        # Convert r to t
        t = (r * math.sqrt(df)) / math.sqrt(1 - r**2)

        # Calculate the probability for t
        p = pval2 = StatTool.probability_for_t(t, StatTool.TWO_TAILED_TEST, df)

        ci = StatTool.pearson_r_confidence_interval(r, self.alpha, grp0.n)
        """
        if ρ (rho) is true correlation for population, CI is the confidence interval
        for ρ, meaning the range of likely values for the population correlation 
        coefficient ρ.
        """

        conclusion1 = self.spell_conclusion_by_ci(ci)
        conclusion2 = self.spell_conclusion_by_p(p, self.alpha)

        slope = r * grp1.sd / grp0.sd
        """Slope for linear regression."""

        intercept = grp1.mean - slope * grp0.mean
        """The regression line always goes through the mean."""
        def calc_se_est(x, y, slope, intercept):
            # Calculate standard error of the estimate.
            #  y = slope * x + intercept
            ss = 0.0
            n = len(x)
            for i in range(n):
                ss += (y[i] - (slope * x[i] + intercept))**2
            return math.sqrt(ss / float(n - 2))

        se_est = calc_se_est(grp0.members, grp1.members, slope, intercept)
        """Standard error of the estimate, measures the accuracy of our regression
        line compared to the actual data."""

        print("DF                       : % d" % df)
        print("Pearson r                : % .3f" % r)
        print("r^2 (coef of determ.)    : % .3f (%.2f%%)" %
              (r_squared, r_squared * 100.0))
        print("Confidence interval      : % .3f - %.3f" % ci)
        print("t-statistic              : % .3f" % t)
        print("P-value                  : % .5f" % p)
        print("Conclusion               : - %s" % conclusion1)
        print("                           - %s" % conclusion2)
        print("Linear regression:")
        print("Slope                    : % .3f" % slope)
        print("Intercept                : % .3f" % intercept)
        print("Standard err. of estimate: % .3f" % se_est)
        print("")