def med_func(x, y, sig, b): small = 1.0e-8 aa = median(y - b*x) d = (y - aa - b*x) mad = median(N.absolute(d)) s = mad / 0.6745 d /= sig sign = N.compress(N.absolute(d) > small, d) sign = sign / N.absolute(sign) x = N.compress(N.absolute(d) > small, x) sum = N.sum(sign * x) return sum, s, aa
def mad(xdata, xmed): # The XMAD subroutine calculates the Median Absolute Deviation from # the sample median. The median, M , is subtracted from each # ORDERED statistic and then the absolute value is taken. This new # set of of statistics is then resorted so that they are ORDERED # statistics. The MAD is then defined to be the median of this # new set of statistics and is returned as XMADM. The MAD can # be defined: # # XMADM = median{ abs(x[i] - M) } # # where the x[i] are the values passed in the array XDATA, and # the median, M, is passed in the array XLETTER. The set of stats # in the brackets is assumed to be resorted. For more information # see page 408 in UREDA. n = len(xdata) dhalf, n1, n2 = (0.5, 1, 2) xdata2 = N.absolute(xdata - xmed) xdata2 = N.sort(xdata2,0) if (float(n)/float(n2) - int(n/n2) == 0): i1 = n/n2 i2 = n/n2 - n1 xmadm = dhalf*(xdata2[i1] + xdata2[i2]) else: i1 = n/n2 xmadm = xdata2[i1] return xmadm
def biwt_func(x, y, sig, b): # Problems?!? aa = median(y - b*x) d = (y - aa - b*x) mad = median(N.absolute(d)) s = mad / 0.6745 d /= sig # biweight c = 6.0 f = d*(1-d**2/c**2)**2 sum = N.sum(N.compress(N.absolute(d) <= c, x*f)) # lorentzian #f = d/(1+0.5*d**2) #sum = N.sum(x*f) # MAD #small = 1.0e-8 #sign = N.compress(N.absolute(d) > small, d) #sign = sign / N.absolute(sign) #sum = N.sum(N.compress(N.absolute(d) > small, x)*sign) return sum, s, aa
def runCorrelations(p, strainCount, traits, db): """ To run the correlations between the traits and the database. This function computes a correlation coefficent between each trait and every entry in the database, and partitions the database into a disjoint array of arrays which it returns. The length of the return array is 2^n, where n is the length of the trait array. Which constitutent element a of the return array a given trait ends up in is determined by the following formula i = i_02^0 + ... + i_(n-1)2^(n-1) where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise Since most of the several thousand database traits will end up with i=0, we don't return them, so the first element of the return array will be empty. A particular element of subarray j of the return array contains a 2-tuple (trait,kvalues). The variable trait is obviously the particular database trait that matches the user traits l_1, ..., l_m to which subarray j corresponds. kvalues is a list of the correlation values linking trait to l_1, ..., l_m, so the length of kvalues is the number of 1s in the binary representation of j (there must be a better way to describe this length). The return array is an array of 2-tuples. The first element of each tuple is the index of the particular subarray, and the second element is the subarray itself. The array is sorted in descending order by the number of 1's in the binary representation of the index so the first few subarrays are the ones that correspond to the largest sets. Each subarray is then sorted by the average of the magnitude of the individual correlation values. """ kMin = p["threshold"] traitArrays = {} # TODO: Add Spearman support if p["correlation"] == "pearson": correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name else: correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name # now we test all of the correlations in bulk test = numarray.absolute(correlations) test = numarray.greater_equal(test, kMin) test = test.astype(numarray.Int8) #print test for i in range(len(db)): cIndex = 0 prods = [] for j in range(len(traits)): if test[i,j] == 1: cIndex += pow(2, j) prods.append(correlations[i,j]) if cIndex != 0: if not traitArrays.has_key(cIndex): traitArrays[cIndex] = [] traitArrays[cIndex].append((db[i], prods)) # sort each inner list of traitArrays # so the matched traits appear in descending order by the # average magnitude of the correlation def customCmp(traitPair, traitPair2): magAvg1 = numarray.average(map(abs, traitPair[1])) magAvg2 = numarray.average(map(abs, traitPair2[1])) # invert the sign to get descending order return -cmp(magAvg1, magAvg2) for traitArray in traitArrays.values(): traitArray.sort(customCmp) # sort the outer list of traitArrays traitArrays2 = [] i = 0 for key in traitArrays.keys(): a = traitArrays[key] if len(a) > 0: traitArrays2.append((key,a,len(binaryDecompose(key)), len(a))) # we sort by the number of 1's in the binary output # and then by the size of the list, both in descending order def customCmp2(aL,bL): a = -cmp(aL[2], bL[2]) if a == 0: return -cmp(aL[3], bL[3]) else: return a traitArrays2.sort(customCmp2) return traitArrays2
def coherence(cross_spectrum, power_spectra, noise=None, numinbin=1, subbed=1): """Compute coherence from cross and power spectra. uncoh, uncoh_err, corcoh = coherence(cross_spectrum, power_spectra, noise=None, numinbin=1, subbed=1) Inputs: cross_spectrum, power_spectra: Complex and real arrays, respectively. power_spectra has one row for each signal band. cross_spectrum has one fewer, and it is assumed that the cross spectra of all bands but the first have been taken with respect to the first. Will correct[1] for noise if it is supplied. numinbin: The number of independent estimates that have gone into each frequency bin. If subbed is true, noise has already been subtracted from the power spectra, and will not be during the calculations. Outputs: uncoh: The raw coherence. Eq. (6.51) in Bendat & Piersol 2000. uncoh_err: Standard deviation in the raw coherence. Eq. (9.81) in Bendat & Piersol 2000. corcoh: The corrected coherence. Eq. (8) in Vaughan & Nowak 1997. [1] Vaughan & Nowak 1997, ApJ 474:L43""" cross_spectrum = num.asarray(cross_spectrum) power_spectra = num.asarray(power_spectra) if noise is not None: correct = 1 noise = num.asarray(noise) if subbed: unsubtracted = power_spectra + noise if correct: subtracted = power_spectra else: unsubtracted = power_spectra if correct: subtracted = power_spectra - noise s_one = unsubtracted[..., :1, :] s_two = unsubtracted[..., 1:, :] numerator = num.absolute(cross_spectrum)**2 uncorrected = numerator / (s_one * s_two) # Sometimes floating-point issues cause these limits to be violated. uncorrected = num.minimum(uncorrected, 1.0) uncorrected = num.maximum(uncorrected, 0.0) # error in uncorrected coherence # eq. (9.81) in Bendat & Piersol 2000 varcoh = 2 * uncorrected * (1 - uncorrected)**2 / numinbin uncoh_error = num.sqrt(varcoh) if correct: s_one = subtracted[..., :1, :] s_two = subtracted[..., 1:, :] n_one = noise[..., :1, :] n_two = noise[..., 1:, :] n_square = (s_one * n_two + n_one * s_two + n_one * n_two) / numinbin numerator -= n_square corrected = numerator / (s_one * s_two) if correct: return uncorrected, uncoh_error, corrected else: return uncorrected, uncoh_error
def coherence(cross_spectrum, power_spectra, noise=None, numinbin=1, subbed=1): """Compute coherence from cross and power spectra. uncoh, uncoh_err, corcoh = coherence(cross_spectrum, power_spectra, noise=None, numinbin=1, subbed=1) Inputs: cross_spectrum, power_spectra: Complex and real arrays, respectively. power_spectra has one row for each signal band. cross_spectrum has one fewer, and it is assumed that the cross spectra of all bands but the first have been taken with respect to the first. Will correct[1] for noise if it is supplied. numinbin: The number of independent estimates that have gone into each frequency bin. If subbed is true, noise has already been subtracted from the power spectra, and will not be during the calculations. Outputs: uncoh: The raw coherence. Eq. (6.51) in Bendat & Piersol 2000. uncoh_err: Standard deviation in the raw coherence. Eq. (9.81) in Bendat & Piersol 2000. corcoh: The corrected coherence. Eq. (8) in Vaughan & Nowak 1997. [1] Vaughan & Nowak 1997, ApJ 474:L43""" cross_spectrum = num.asarray(cross_spectrum) power_spectra = num.asarray(power_spectra) if noise is not None: correct = 1 noise = num.asarray(noise) if subbed: unsubtracted = power_spectra + noise if correct: subtracted = power_spectra else: unsubtracted = power_spectra if correct: subtracted = power_spectra - noise s_one = unsubtracted[..., :1, :] s_two = unsubtracted[..., 1:, :] numerator = num.absolute(cross_spectrum) ** 2 uncorrected = numerator / (s_one * s_two) # Sometimes floating-point issues cause these limits to be violated. uncorrected = num.minimum(uncorrected, 1.0) uncorrected = num.maximum(uncorrected, 0.0) # error in uncorrected coherence # eq. (9.81) in Bendat & Piersol 2000 varcoh = 2 * uncorrected * (1 - uncorrected) ** 2 / numinbin uncoh_error = num.sqrt(varcoh) if correct: s_one = subtracted[..., :1, :] s_two = subtracted[..., 1:, :] n_one = noise[..., :1, :] n_two = noise[..., 1:, :] n_square = (s_one * n_two + n_one * s_two + n_one * n_two) / numinbin numerator -= n_square corrected = numerator / (s_one * s_two) if correct: return uncorrected, uncoh_error, corrected else: return uncorrected, uncoh_error