def run(self, observed, expected=None, conditions_list=None, measure='Measure', alpha=0.05): """ """ chisq, prob, df, expected = _stats.lchisquare(observed, expected) try: lnchisq, lnprob, lndf, lnexpected = \ _stats.llnchisquare(observed, expected) except: lnchisq, lnprob, lndf, lnexpected = 'nan','nan','nan','nan' self.observed = observed self.expected = expected self.alpha = alpha if conditions_list == None: self.conditions_list = [] abc = lambda i : 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\ [i%26]*(int(math.floor(i/26))+1) for i in _xrange(len(observed)): self.conditions_list.append(abc(i)) else: self.conditions_list = conditions_list self['chisq'] = chisq self['p'] = prob self['df'] = df self['lnchisq'] = lnchisq self['lnp'] = lnprob self['lndf'] = lndf self['N'] = sum(observed) self.observed = observed self.expected = expected p_observed = [v/float(self['N']) for v in observed] p_expected = [v/float(self['N']) for v in expected] p_chisq = sum([(po-pe)**2/pe for po,pe in zip(p_observed,p_expected)]) self['w'] = math.sqrt(p_chisq) self['lambda'] = p_chisq*self['N'] self['crit_chi2'] = scipy.stats.chi2.ppf((1.-alpha),df) self['power'] = 1. - ncx2cdf(self['crit_chi2'],df,self['lambda'])
def run(self, row_factor, col_factor, alpha=0.05): """ runs a 2-way chi square on the matched data in row_factor and col_factor. """ if len(row_factor) != len(col_factor): raise Exception('row_factor and col_factor must be equal lengths') counter = Counter() row_counter = Counter() col_counter = Counter() for r, c in zip(row_factor, col_factor): counter[(r, c)] += 1. row_counter[r] += 1. col_counter[c] += 1. N = float(sum(counter.values())) observed = [] expected = [] for r in sorted(row_counter): observed.append([]) expected.append([]) for c in sorted(col_counter): observed[-1].append(counter[(r, c)]) expected[-1].append((row_counter[r] * col_counter[c]) / N) N_r, N_c = len(row_counter), len(col_counter) df = (N_r - 1) * (N_c - 1) chisq = sum((o - e)**2 / e for o, e in zip(_flatten(observed), _flatten(expected))) prob = _stats.chisqprob(chisq, df) try: lnchisq = 2. * sum( o * math.log(o / e) for o, e in zip(_flatten(observed), _flatten(expected))) lnprob = _stats.chisqprob(lnchisq, df) except: lnchisq = 'nan' lnprob = 'nan' if N_r == N_c == 2: ccchisq = sum( (abs(o - e) - 0.5)**2 / e for o, e in zip(_flatten(observed), _flatten(expected))) ccprob = _stats.chisqprob(ccchisq, df) else: ccchisq = None ccprob = None def rprob(r, df): TINY = 1e-30 t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) return _stats.betai(0.5 * df, 0.5, df / (df + t * t)) k = min([N_r, N_c]) cramerV = math.sqrt(chisq / (N * (k - 1))) cramerV_prob = rprob(cramerV, N - 2) C = math.sqrt(chisq / (chisq + N)) C_prob = rprob(C, N - 2) self['chisq'] = chisq self['p'] = prob self['df'] = df self['lnchisq'] = lnchisq self['lnp'] = lnprob self['ccchisq'] = ccchisq self['ccp'] = ccprob self['N'] = N self['C'] = C self['CramerV'] = cramerV self['CramerV_prob'] = cramerV_prob self['C'] = C self['C_prob'] = C_prob self.counter = counter self.row_counter = row_counter self.col_counter = col_counter self.N_r = N_r self.N_c = N_c p_observed = [v / float(self['N']) for v in _flatten(observed)] p_expected = [v / float(self['N']) for v in _flatten(expected)] p_chisq = sum([(po - pe)**2 / pe for po, pe in zip(p_observed, p_expected)]) self['w'] = math.sqrt(p_chisq) self['lambda'] = p_chisq * self['N'] self['crit_chi2'] = scipy.stats.chi2.ppf((1. - alpha), df) self['power'] = 1. - ncx2cdf(self['crit_chi2'], df, self['lambda'])
def run(self, row_factor, col_factor, alpha=0.05): """ runs a 2-way chi square on the matched data in row_factor and col_factor. """ if len(row_factor) != len(col_factor): raise Exception('row_factor and col_factor must be equal lengths') counter = Counter() row_counter= Counter() col_counter= Counter() for r,c in zip(row_factor, col_factor): counter[(r,c)] += 1. row_counter[r] += 1. col_counter[c] += 1. N = float(sum(counter.values())) observed = [] expected = [] for r in sorted(row_counter): observed.append([]) expected.append([]) for c in sorted(col_counter): observed[-1].append(counter[(r,c)]) expected[-1].append((row_counter[r]*col_counter[c])/N) N_r, N_c = len(row_counter), len(col_counter) df = (N_r - 1) * (N_c - 1) chisq = sum((o-e)**2/e for o,e in zip(_flatten(observed),_flatten(expected))) prob = _stats.chisqprob(chisq, df) try: lnchisq = 2.*sum(o*math.log(o/e) for o,e in zip(_flatten(observed),_flatten(expected))) lnprob = _stats.chisqprob(lnchisq, df) except: lnchisq = 'nan' lnprob = 'nan' if N_r == N_c == 2: ccchisq = sum((abs(o-e)-0.5)**2/e for o,e in zip(_flatten(observed),_flatten(expected))) ccprob = _stats.chisqprob(ccchisq, df) else: ccchisq = None ccprob = None def rprob(r,df): TINY = 1e-30 t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) return _stats.betai(0.5*df,0.5,df/(df+t*t)) k = min([N_r, N_c]) cramerV = math.sqrt(chisq/(N*(k-1))) cramerV_prob = rprob(cramerV, N-2) C = math.sqrt(chisq/(chisq + N)) C_prob = rprob(C, N-2) self['chisq'] = chisq self['p'] = prob self['df'] = df self['lnchisq'] = lnchisq self['lnp'] = lnprob self['ccchisq'] = ccchisq self['ccp'] = ccprob self['N'] = N self['C'] = C self['CramerV'] = cramerV self['CramerV_prob'] = cramerV_prob self['C'] = C self['C_prob'] = C_prob self.counter = counter self.row_counter = row_counter self.col_counter = col_counter self.N_r = N_r self.N_c = N_c p_observed = [v/float(self['N']) for v in _flatten(observed)] p_expected = [v/float(self['N']) for v in _flatten(expected)] p_chisq = sum([(po-pe)**2/pe for po,pe in zip(p_observed,p_expected)]) self['w'] = math.sqrt(p_chisq) self['lambda'] = p_chisq*self['N'] self['crit_chi2'] = scipy.stats.chi2.ppf((1.-alpha),df) self['power'] = 1. - ncx2cdf(self['crit_chi2'],df,self['lambda'])