Example #1
0
    def run(self, observed, expected=None, conditions_list=None,
            measure='Measure', alpha=0.05):
        """
        
        """
        chisq, prob, df, expected = _stats.lchisquare(observed, expected)
        try:
            lnchisq, lnprob, lndf, lnexpected = \
                     _stats.llnchisquare(observed, expected)
        except:
            lnchisq, lnprob, lndf, lnexpected = 'nan','nan','nan','nan'
            
        self.observed = observed
        self.expected = expected
        self.alpha = alpha
        
        if conditions_list == None:
            self.conditions_list = []
            abc = lambda i : 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\
                             [i%26]*(int(math.floor(i/26))+1)
            for i in _xrange(len(observed)):
                self.conditions_list.append(abc(i))
        else:
            self.conditions_list = conditions_list

        self['chisq'] = chisq
        self['p'] = prob
        self['df'] = df
        self['lnchisq'] = lnchisq
        self['lnp'] = lnprob
        self['lndf'] = lndf
        self['N'] = sum(observed)
        self.observed = observed
        self.expected = expected

        p_observed = [v/float(self['N']) for v in observed]
        p_expected = [v/float(self['N']) for v in expected]

        p_chisq = sum([(po-pe)**2/pe for po,pe in zip(p_observed,p_expected)])
        self['w'] = math.sqrt(p_chisq)
        self['lambda'] = p_chisq*self['N']
        self['crit_chi2'] = scipy.stats.chi2.ppf((1.-alpha),df)
        self['power'] = 1. - ncx2cdf(self['crit_chi2'],df,self['lambda'])
Example #2
0
    def run(self, row_factor, col_factor, alpha=0.05):
        """
        runs a 2-way chi square on the matched data in row_factor
        and col_factor.
        """

        if len(row_factor) != len(col_factor):
            raise Exception('row_factor and col_factor must be equal lengths')

        counter = Counter()
        row_counter = Counter()
        col_counter = Counter()
        for r, c in zip(row_factor, col_factor):
            counter[(r, c)] += 1.
            row_counter[r] += 1.
            col_counter[c] += 1.

        N = float(sum(counter.values()))
        observed = []
        expected = []
        for r in sorted(row_counter):
            observed.append([])
            expected.append([])
            for c in sorted(col_counter):
                observed[-1].append(counter[(r, c)])
                expected[-1].append((row_counter[r] * col_counter[c]) / N)

        N_r, N_c = len(row_counter), len(col_counter)
        df = (N_r - 1) * (N_c - 1)

        chisq = sum((o - e)**2 / e
                    for o, e in zip(_flatten(observed), _flatten(expected)))
        prob = _stats.chisqprob(chisq, df)

        try:
            lnchisq = 2. * sum(
                o * math.log(o / e)
                for o, e in zip(_flatten(observed), _flatten(expected)))
            lnprob = _stats.chisqprob(lnchisq, df)
        except:
            lnchisq = 'nan'
            lnprob = 'nan'

        if N_r == N_c == 2:
            ccchisq = sum(
                (abs(o - e) - 0.5)**2 / e
                for o, e in zip(_flatten(observed), _flatten(expected)))
            ccprob = _stats.chisqprob(ccchisq, df)
        else:
            ccchisq = None
            ccprob = None

        def rprob(r, df):
            TINY = 1e-30
            t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
            return _stats.betai(0.5 * df, 0.5, df / (df + t * t))

        k = min([N_r, N_c])
        cramerV = math.sqrt(chisq / (N * (k - 1)))
        cramerV_prob = rprob(cramerV, N - 2)
        C = math.sqrt(chisq / (chisq + N))
        C_prob = rprob(C, N - 2)

        self['chisq'] = chisq
        self['p'] = prob
        self['df'] = df
        self['lnchisq'] = lnchisq
        self['lnp'] = lnprob
        self['ccchisq'] = ccchisq
        self['ccp'] = ccprob
        self['N'] = N
        self['C'] = C
        self['CramerV'] = cramerV
        self['CramerV_prob'] = cramerV_prob
        self['C'] = C
        self['C_prob'] = C_prob

        self.counter = counter
        self.row_counter = row_counter
        self.col_counter = col_counter
        self.N_r = N_r
        self.N_c = N_c

        p_observed = [v / float(self['N']) for v in _flatten(observed)]
        p_expected = [v / float(self['N']) for v in _flatten(expected)]

        p_chisq = sum([(po - pe)**2 / pe
                       for po, pe in zip(p_observed, p_expected)])
        self['w'] = math.sqrt(p_chisq)
        self['lambda'] = p_chisq * self['N']
        self['crit_chi2'] = scipy.stats.chi2.ppf((1. - alpha), df)
        self['power'] = 1. - ncx2cdf(self['crit_chi2'], df, self['lambda'])
Example #3
0
    def run(self, row_factor, col_factor, alpha=0.05):   
        """
        runs a 2-way chi square on the matched data in row_factor
        and col_factor.
        """

        if len(row_factor) != len(col_factor):
            raise Exception('row_factor and col_factor must be equal lengths')

        counter = Counter()
        row_counter= Counter()
        col_counter= Counter()
        for r,c in zip(row_factor, col_factor):
            counter[(r,c)] += 1.
            row_counter[r] += 1.
            col_counter[c] += 1.

        N = float(sum(counter.values()))
        observed = []
        expected = []
        for r in sorted(row_counter):
            observed.append([])
            expected.append([])
            for c in sorted(col_counter):
                observed[-1].append(counter[(r,c)])
                expected[-1].append((row_counter[r]*col_counter[c])/N)

        N_r, N_c = len(row_counter), len(col_counter)
        df = (N_r - 1) * (N_c - 1)

        chisq = sum((o-e)**2/e for o,e in
                    zip(_flatten(observed),_flatten(expected)))
        prob = _stats.chisqprob(chisq, df)

        try:        
            lnchisq = 2.*sum(o*math.log(o/e) for o,e in
                             zip(_flatten(observed),_flatten(expected)))
            lnprob = _stats.chisqprob(lnchisq, df)
        except:
            lnchisq = 'nan'
            lnprob = 'nan'

        if N_r == N_c == 2:
            ccchisq = sum((abs(o-e)-0.5)**2/e for o,e in
                          zip(_flatten(observed),_flatten(expected)))
            ccprob = _stats.chisqprob(ccchisq, df)
        else:
            ccchisq = None
            ccprob = None
            

        def rprob(r,df):
            TINY = 1e-30
            t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
            return _stats.betai(0.5*df,0.5,df/(df+t*t))
            
        k = min([N_r, N_c])
        cramerV = math.sqrt(chisq/(N*(k-1)))
        cramerV_prob = rprob(cramerV, N-2)
        C = math.sqrt(chisq/(chisq + N))
        C_prob = rprob(C, N-2)
                
        self['chisq'] = chisq
        self['p'] = prob
        self['df'] = df
        self['lnchisq'] = lnchisq
        self['lnp'] = lnprob
        self['ccchisq'] = ccchisq
        self['ccp'] = ccprob
        self['N'] = N
        self['C'] = C
        self['CramerV'] = cramerV
        self['CramerV_prob'] = cramerV_prob
        self['C'] = C
        self['C_prob'] = C_prob
        
        self.counter = counter
        self.row_counter = row_counter
        self.col_counter = col_counter
        self.N_r = N_r
        self.N_c = N_c

        p_observed = [v/float(self['N']) for v in _flatten(observed)]
        p_expected = [v/float(self['N']) for v in _flatten(expected)]

        p_chisq = sum([(po-pe)**2/pe for po,pe in zip(p_observed,p_expected)])
        self['w'] = math.sqrt(p_chisq)
        self['lambda'] = p_chisq*self['N']
        self['crit_chi2'] = scipy.stats.chi2.ppf((1.-alpha),df)
        self['power'] = 1. - ncx2cdf(self['crit_chi2'],df,self['lambda'])