예제 #1
0
def med_func(x, y, sig, b):
    small = 1.0e-8
    aa = median(y - b*x)
    d = (y - aa - b*x)
    mad = median(N.absolute(d))
    s = mad  / 0.6745
    d /= sig
    sign = N.compress(N.absolute(d) > small, d)
    sign = sign / N.absolute(sign)
    x = N.compress(N.absolute(d) > small, x)
    sum = N.sum(sign * x)
    return sum, s, aa
예제 #2
0
def mad(xdata, xmed):
    # The XMAD subroutine calculates the Median Absolute Deviation from
    #    the sample median. The median, M , is subtracted from each
    #    ORDERED statistic and then the absolute value is taken. This new
    #    set of of statistics is then resorted so that they are ORDERED
    #    statistics. The MAD is then defined to be the median of this
    #    new set of statistics and is returned as XMADM. The MAD can
    #    be defined:
    #
    #                   XMADM = median{ abs(x[i] - M) }
    #
    #    where the x[i] are the values passed in the array XDATA, and
    #    the median, M, is passed in the array XLETTER. The set of stats
    #    in the brackets is assumed to be resorted. For more information
    #    see page 408 in UREDA.
    n = len(xdata)
    dhalf, n1, n2 = (0.5, 1, 2)
    xdata2 = N.absolute(xdata - xmed)
    xdata2 = N.sort(xdata2,0)
    if (float(n)/float(n2) - int(n/n2) == 0):
        i1 = n/n2
        i2 = n/n2 - n1
        xmadm = dhalf*(xdata2[i1] + xdata2[i2])
    else:
        i1 = n/n2
        xmadm = xdata2[i1]
    return xmadm
예제 #3
0
def biwt_func(x, y, sig, b): # Problems?!?
    aa = median(y - b*x)
    d = (y - aa - b*x)
    mad = median(N.absolute(d))
    s = mad  / 0.6745
    d /= sig
    # biweight
    c = 6.0
    f = d*(1-d**2/c**2)**2
    sum = N.sum(N.compress(N.absolute(d) <= c, x*f))
    # lorentzian
    #f = d/(1+0.5*d**2)
    #sum = N.sum(x*f)
    # MAD
    #small = 1.0e-8
    #sign = N.compress(N.absolute(d) > small, d)
    #sign = sign / N.absolute(sign)
    #sum = N.sum(N.compress(N.absolute(d) > small, x)*sign)
    return sum, s, aa
예제 #4
0
def runCorrelations(p, strainCount, traits, db):
    """
    To run the correlations between the traits and the database.
    This function computes a correlation coefficent between each
    trait and every entry in the database, and partitions the database
    into a disjoint array of arrays which it returns.

    The length of the return array is 2^n, where n is the length of
    the trait array. Which constitutent element a of the return array
    a given trait ends up in is determined by the following formula
    i = i_02^0 + ... + i_(n-1)2^(n-1)
    where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise

    Since most of the several thousand database traits will end up
    with i=0, we don't return them, so the first element of the
    return array will be empty.

    A particular element of subarray j of the return array contains
    a 2-tuple (trait,kvalues). The variable trait is obviously the
    particular database trait that matches the user traits l_1, ..., l_m
    to which subarray j corresponds. kvalues is a list of the correlation
    values linking trait to l_1, ..., l_m, so the length of kvalues is
    the number of 1s in the binary representation of j (there must be
    a better way to describe this length).

    The return array is an array of 2-tuples. The first element of
    each tuple is the index of the particular subarray, and the second
    element is the subarray itself. The array is sorted in descending
    order by the number of 1's in the binary representation of the
    index so the first few subarrays are the ones that correspond to
    the largest sets. Each subarray is then sorted by the average of
    the magnitude of the individual correlation values.
    """
    kMin = p["threshold"]
    traitArrays = {}

    # TODO: Add Spearman support
    if p["correlation"] == "pearson":
        correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
    else:
        correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name

    # now we test all of the correlations in bulk
    test = numarray.absolute(correlations) 
    test = numarray.greater_equal(test, kMin)
    test = test.astype(numarray.Int8)
    #print test
    

    for i in range(len(db)):
        cIndex = 0
        prods = []
        for j in range(len(traits)):
            if test[i,j] == 1:
                cIndex += pow(2, j)
                prods.append(correlations[i,j])
        if cIndex != 0:
            if not traitArrays.has_key(cIndex):
                traitArrays[cIndex] = []

            traitArrays[cIndex].append((db[i], prods))
                
    # sort each inner list of traitArrays
    # so the matched traits appear in descending order by the
    # average magnitude of the correlation
    def customCmp(traitPair, traitPair2):
        magAvg1 = numarray.average(map(abs, traitPair[1]))
        magAvg2 = numarray.average(map(abs, traitPair2[1]))

        # invert the sign to get descending order
        return -cmp(magAvg1, magAvg2)
    
    for traitArray in traitArrays.values():
        traitArray.sort(customCmp)

    # sort the outer list of traitArrays
    traitArrays2 = []
    i = 0
    for key in traitArrays.keys():
        a = traitArrays[key]
        if len(a) > 0:
            traitArrays2.append((key,a,len(binaryDecompose(key)),
                                 len(a)))

    # we sort by the number of 1's in the binary output
    # and then by the size of the list, both in descending order
    def customCmp2(aL,bL):
        a = -cmp(aL[2], bL[2])
        if a == 0:
            return -cmp(aL[3], bL[3])
        else:
            return a

    traitArrays2.sort(customCmp2)

    return traitArrays2
예제 #5
0
def coherence(cross_spectrum, power_spectra, noise=None, numinbin=1, subbed=1):
    """Compute coherence from cross and power spectra.

    uncoh, uncoh_err, corcoh = coherence(cross_spectrum, power_spectra,
                                         noise=None, numinbin=1, subbed=1)

    Inputs:

    cross_spectrum, power_spectra: Complex and real arrays, respectively.
    power_spectra has one row for each signal band.
    cross_spectrum has one fewer, and it is assumed that the cross spectra
    of all bands but the first have been taken with respect to the first.

    Will correct[1] for noise if it is supplied.

    numinbin: The number of independent estimates that have gone into
    each frequency bin.

    If subbed is true, noise has already been subtracted from
    the power spectra, and will not be during the calculations.

    Outputs:

    uncoh: The raw coherence.  Eq. (6.51) in Bendat & Piersol 2000.

    uncoh_err:  Standard deviation in the raw coherence.
    Eq. (9.81) in Bendat & Piersol 2000.

    corcoh: The corrected coherence.  Eq. (8) in Vaughan & Nowak 1997.

    [1] Vaughan & Nowak 1997, ApJ 474:L43"""

    cross_spectrum = num.asarray(cross_spectrum)
    power_spectra = num.asarray(power_spectra)
    if noise is not None:
        correct = 1
        noise = num.asarray(noise)

    if subbed:
        unsubtracted = power_spectra + noise
        if correct:
            subtracted = power_spectra
    else:
        unsubtracted = power_spectra
        if correct:
            subtracted = power_spectra - noise

    s_one = unsubtracted[..., :1, :]
    s_two = unsubtracted[..., 1:, :]
    numerator = num.absolute(cross_spectrum)**2
    uncorrected = numerator / (s_one * s_two)
    # Sometimes floating-point issues cause these limits to be violated.
    uncorrected = num.minimum(uncorrected, 1.0)
    uncorrected = num.maximum(uncorrected, 0.0)

    # error in uncorrected coherence
    # eq. (9.81) in Bendat & Piersol 2000
    varcoh = 2 * uncorrected * (1 - uncorrected)**2 / numinbin
    uncoh_error = num.sqrt(varcoh)

    if correct:
        s_one = subtracted[..., :1, :]
        s_two = subtracted[..., 1:, :]
        n_one = noise[..., :1, :]
        n_two = noise[..., 1:, :]
        n_square = (s_one * n_two + n_one * s_two + n_one * n_two) / numinbin
        numerator -= n_square
        corrected = numerator / (s_one * s_two)

    if correct:
        return uncorrected, uncoh_error, corrected
    else:
        return uncorrected, uncoh_error
예제 #6
0
def coherence(cross_spectrum, power_spectra, noise=None, numinbin=1,
              subbed=1):

    """Compute coherence from cross and power spectra.

    uncoh, uncoh_err, corcoh = coherence(cross_spectrum, power_spectra,
                                         noise=None, numinbin=1, subbed=1)

    Inputs:

    cross_spectrum, power_spectra: Complex and real arrays, respectively.
    power_spectra has one row for each signal band.
    cross_spectrum has one fewer, and it is assumed that the cross spectra
    of all bands but the first have been taken with respect to the first.

    Will correct[1] for noise if it is supplied.

    numinbin: The number of independent estimates that have gone into
    each frequency bin.

    If subbed is true, noise has already been subtracted from
    the power spectra, and will not be during the calculations.

    Outputs:

    uncoh: The raw coherence.  Eq. (6.51) in Bendat & Piersol 2000.

    uncoh_err:  Standard deviation in the raw coherence.
    Eq. (9.81) in Bendat & Piersol 2000.

    corcoh: The corrected coherence.  Eq. (8) in Vaughan & Nowak 1997.

    [1] Vaughan & Nowak 1997, ApJ 474:L43"""

    cross_spectrum = num.asarray(cross_spectrum)
    power_spectra = num.asarray(power_spectra)
    if noise is not None:
        correct = 1
        noise = num.asarray(noise)

    if subbed:
        unsubtracted = power_spectra + noise
        if correct:
            subtracted = power_spectra
    else:
        unsubtracted = power_spectra
        if correct:
            subtracted = power_spectra - noise

    s_one = unsubtracted[..., :1, :]
    s_two = unsubtracted[..., 1:, :]
    numerator = num.absolute(cross_spectrum) ** 2
    uncorrected = numerator / (s_one * s_two)
    # Sometimes floating-point issues cause these limits to be violated.
    uncorrected = num.minimum(uncorrected, 1.0)
    uncorrected = num.maximum(uncorrected, 0.0)

    # error in uncorrected coherence
    # eq. (9.81) in Bendat & Piersol 2000
    varcoh = 2 * uncorrected * (1 - uncorrected) ** 2 / numinbin 
    uncoh_error = num.sqrt(varcoh)

    if correct:
        s_one = subtracted[..., :1, :]
        s_two = subtracted[..., 1:, :]
        n_one = noise[..., :1, :]
        n_two = noise[..., 1:, :]
        n_square = (s_one * n_two + n_one * s_two + n_one * n_two) / numinbin
        numerator -= n_square
        corrected = numerator / (s_one * s_two)

    if correct:
        return uncorrected, uncoh_error, corrected
    else:
        return uncorrected, uncoh_error