def safe_sum_p_log_p(a, base=None): """Calculates p * log(p) safely for an array that may contain zeros.""" flat = ravel(a) nz = take(flat, nonzero(flat)) logs = log(nz) if base: logs /= log(base) return sum(nz * logs)
def G_fit(obs, exp, williams=1): """G test for fit between two lists of counts. Usage: test, prob = G_fit(obs, exp, williams) obs and exp are two lists of numbers. williams is a boolean stating whether to do the Williams correction. SUM(2 f(obs)ln (f(obs)/f(exp))) See Sokal and Rohlf chapter 17. """ k = len(obs) if k != len(exp): raise ValueError, "G_fit requires two lists of equal length." G = 0 n = 0 for o, e in zip(obs, exp): if o < 0: raise ValueError, \ "G_fit requires all observed values to be positive." if e <= 0: raise ZeroExpectedError, \ "G_fit requires all expected values to be positive." if o: #if o is zero, o * log(o/e) must be zero as well. G += o * log(o/e) n += o G *= 2 if williams: q = 1 + (k + 1)/(6*n) G /= q return G, chi_high(G, k - 1)
def log2(x): """Returns the log (base 2) of x" WARNING: log2(0) will give -inf on one platform, but it might raise an error (Overflow or ZeroDivision on another platform. So don't rely on getting -inf in your downstream code. """ return log(x)/ln_2
def G_ind(m, williams=False): """Returns G test for independence in an r x c table. Requires input data as a Numeric array. From Sokal and Rohlf p 738. """ f_ln_f_elements = safe_sum_p_log_p(m) f_ln_f_rows = safe_sum_p_log_p(sum(m)) f_ln_f_cols = safe_sum_p_log_p(sum(m,1)) tot = sum(ravel(m)) f_ln_f_table = tot * log(tot) df = (len(m)-1) * (len(m[0])-1) G = 2*(f_ln_f_elements-f_ln_f_rows-f_ln_f_cols+f_ln_f_table) if williams: q = 1+((tot*sum(1.0/sum(m,1))-1)*(tot*sum(1.0/sum(m))-1)/ \ (6*tot*df)) G = G/q return G, chi_high(max(G,0), df)
def reindex(self): periods = self.periods period = self.series[-periods:] current = period[-1] mx = max(period) mn = min(period) try: inter = 0.33 * 2 * ((current - mn) / (mx - mn) - 0.5) + (0.67 * self.inter[-1]) if inter > 0.99: inter = 0.99 elif inter < -0.99: inter = -0.99 fish = 0.5 * log((1 + inter) / (1 - inter)) + (0.5 * self[-1]) except (TypeError, IndexError, ZeroDivisionError, ): inter = 0 fish = 0 self.inter.append(inter) self.append(fish)
def fn(x): if x == 0: return 0 else: return x * (log(x) / log(2))
def G_2_by_2(a, b, c, d, williams=1, directional=1): """G test for independence in a 2 x 2 table. Usage: G, prob = G_2_by_2(a, b, c, d, willliams, directional) Cells are in the order: a b c d a, b, c, and d can be int, float, or long. williams is a boolean stating whether to do the Williams correction. directional is a boolean stating whether the test is 1-tailed. Briefly, computes sum(f ln f) for cells - sum(f ln f) for rows and columns + f ln f for the table. Always has 1 degree of freedom To generalize the test to r x c, use the same protocol: 2*(cells - rows/cols + table), then with (r-1)(c-1) df. Note that G is always positive: to get a directional test, the appropriate ratio (e.g. a/b > c/d) must be tested as a separate procedure. Find the probability for the observed G, and then either halve or halve and subtract from one depending on whether the directional prediction was upheld. The default test is now one-tailed (Rob Knight 4/21/03). See Sokal & Rohlf (1995), ch. 17. Specifically, see box 17.6 (p731). """ cells = [a, b, c, d] n = sum(cells) #return 0 if table was empty if not n: return (0, 1) #raise error if any counts were negative if min(cells) < 0: raise ValueError, \ "G_2_by_2 got negative cell counts(s): must all be >= 0." G = 0 #Add x ln x for items, adding zero for items whose counts are zero for i in filter(None, cells): G += i * log(i) #Find totals for rows and cols ab = a + b cd = c + d ac = a + c bd = b + d rows_cols = [ab, cd, ac, bd] #exit if we are missing a row or column entirely: result counts as #never significant if min(rows_cols) == 0: return (0, 1) #Subtract x ln x for rows and cols for i in filter(None, rows_cols): G -= i * log(i) #Add x ln x for table G += n * log(n) #Result needs to be multiplied by 2 G *= 2 #apply Williams correction if williams: q = 1 + (( ( (n/ab) + (n/cd) ) -1 ) * ( ( (n/ac) + (n/bd) ) -1))/(6*n) G /= q p = chi_high(max(G,0), 1) #find which tail we were in if the test was directional if directional: is_high = ((b == 0) or (d != 0 and (a/b > c/d))) p = tail(p, is_high) if not is_high: G = -G return G, p
def logImage(self): if self.fname == '': return from Numeric import log data = self.data da = log(data) self.createImage(da)
idx_pairs = take(transform, data[:,0:2].astype(Int32)) else: idx_pairs = data[:,0:2].astype(Int32) #figure out biggest item if not supplied if num_items is None: num_items = int(max(ravel(idx_pairs))) + 1 #make result array result = zeros((num_items,num_items), Float64) if len(data[0]) == 2: values = 1 else: values = data[:,2] put(ravel(result), idx_pairs[:,0]*num_items+idx_pairs[:,1], values) return result ln_2 = log(2) def log2(x): """Returns the log (base 2) of x" WARNING: log2(0) will give -inf on one platform, but it might raise an error (Overflow or ZeroDivision on another platform. So don't rely on getting -inf in your downstream code. """ return log(x)/ln_2 def safe_p_log_p(a): """Returns -(p*log2(p)) for every nonzero p in a. a: Numeric array
def exct(iline, upper, lower): above = -1.0 * hp * frq[iline] / kb below = upper / lower * glo[iline] / gup[iline] return above / log(below)