Ejemplo n.º 1
0
def Bray_Curtis(original, test):
    """
    Complement Bray and Curtis coefficient for interval or ratio data.
    Lower boundary of Bray and Curtis coefficient represents complete
    similarity (no difference).
    
    Coefficient:
    M{1 - S{sum}(abs((A + B)(i) - (A + C)(i))) / 
    (S{sum}((A + B)(i)) + S{sum}((A + C)(i)))}
    
    @see: Bray JR and Curtis JT. 1957. An ordination of the upland 
    forest communities of S. Winconsin. Ecological Monographs 27: 
    325-349.
    
    @param original: list of original data
    @param test: list of data to test against original
    
    @status: Tested function
    @since: version 0.4
    """
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Bray-Curtis distance")
    return 1 - (Manhattan(original, test) / \
            float(sum(original) + sum(test)))
Ejemplo n.º 2
0
def Manhattan(original, test):
    """
    Manhattan coefficient for interval or ratio data.
    
    Coefficient: M{S{sum}(abs((A + B)(i) - (A + C)(i)))}
    
    Manhattan Distance is also known as City Block Distance. It is 
    essentially summation of the absolute difference between each 
    element.

    @see: Krause, Eugene F. 1987. Taxicab Geometry. Dover. ISBN 0-486-
    25202-7. 
    
    @param original: list of original data
    @param test: list of data to test against original
    
    @status: Tested function
    @since: version 0.4
    """
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Manhattan distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + abs(original[i] - test[i])
    return float(sum)
Ejemplo n.º 3
0
def Euclidean(original, test):
    """
    Euclidean coefficient for interval or ratio data.
    
    Coefficient: M{sqrt(S{sum}(((A + B)(i) - (A + C)(i)) ^ 2))}
    
    euclidean(original, test) -> euclidean distance between original 
    and test. Adapted from BioPython
    
    @param original: list of original data
    @param test: list of data to test against original
    
    @status: Tested function
    @since: version 0.1
    """
    # lightly modified from implementation by Thomas Sicheritz-Ponten.
    # This works faster than the Numeric implementation on shorter
    # vectors.
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Euclidean distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + (original[i] - test[i]) ** 2
    return math.sqrt(sum)
Ejemplo n.º 4
0
def Cosine(original, test):
    """
    Cosine coefficient for interval or ratio data.
    
    Coefficient: 
    M{S{sum}(abs((A + B)(i) * (A + C)(i))) / 
    (S{sum}((A + B) ^ 2) * S{sum}((A + C) ^ 2))}
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Cosine distance")
    original = [float(x) for x in original]
    test = [float(x) for x in test]
    numerator = sum([original[x] * test[x] for x in range(len(original))])
    denominator = sum([x * x for x in original]) ** 0.5    
    denominator = denominator * (sum([x * x for x in test]) ** 0.5)
    return numerator / denominator
Ejemplo n.º 5
0
def Hamming(original, test):
    """
    Hamming coefficient for ordinal data - only for positional data.
    
    Coefficient: number of mismatches with respect to position
    
    @param original: list of original data
    @param test: list of data to test against original

    @see: Ling, MHT. 2010. COPADS, I: Distances Measures between Two
    Lists or Sets. The Python Papers Source Codes 2:2.
    """
    if len(original) <> len(test): 
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Hamming's distance")
    mismatch = 0
    for index in range(len(original)):
        if original[index] <> test[index]: mismatch = mismatch + 1
    return mismatch
Ejemplo n.º 6
0
def Canberra(original, test):
    """
    Canberra coefficient for interval or ratio data.
    
    Coefficient: 
    M{S{sum}(abs((A + B)(i) - (A + C)(i)) / abs((A + B)(i) + (A + C)(i)))}
    
    @see: Lance GN and Williams WT. 1966. Computer programs for 
    hierarchical polythetic classification. Computer Journal 9: 60-64.
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Canberra distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + (abs(original[i] - test[i]) / abs(original[i] + \
            test[i]))
    return sum
Ejemplo n.º 7
0
def Minkowski(original, test, power=3):
    """
    Minkowski coefficient for interval or ratio data.
    
    Coefficient: M{power-th root(S{sum}(((A + B)(i) - (A + C)(i)) ^ power))}
    
    Minkowski Distance is a generalized absolute form of Euclidean 
    Distance. Minkowski Distance = Euclidean Distance when power = 2
    
    @param original: list of original data
    @param test: list of data to test against original
    @param power: expontential variable
    @type power: integer"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Minkowski distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + abs(original[i] - test[i]) ** power
    return sum ** (1 / float(power))
Ejemplo n.º 8
0
def Sokal_Michener(original, test, absent=0, type='Set'):
    """
    Sokal and Michener coefficient for nominal or ordinal data.
         
    Coefficient: M{(A + D) / (A + B + C + D)}
        
    @param original: list of original data
    @param test: list of data to test against original
    @param absent: user-defined identifier for absent of region, 
        default = 0
    @param type: {Set | List}, define whether use Set comparison 
        (non-positional) or list comparison (positional), default = Set

    @see: Ling, MHT. 2010. COPADS, I: Distances Measures between Two
    Lists or Sets. The Python Papers Source Codes 2:2.
    """
    if len(original) <> len(test): 
        raise DistanceInputSizeError("Size (length) of inputs must be \
                equal for Sokal & Michener's distance")
    (original, test, both, none) = compare(original, test, absent, type)
    return (both + none) / (original + test + both + none)
Ejemplo n.º 9
0
def Tanimoto(original, test):
    """
    Tanimoto coefficient for interval or ratio data.
    
    Coefficient: 
    M{S{sum}(abs((A + B)(i) * (A + C)(i))) /
    (S{sum}((A + B) ^ 2) + S{sum}((A + C) ^ 2) - 
    S{sum}(abs((A + B)(i) * (A + C)(i))))}
    
    @param original: list of original data
    @param test: list of data to test against original
    
    @status: Tested function
    @since: version 0.4
    """
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Cosine distance")
    original = [float(x) for x in original]
    test = [float(x) for x in test]
    numerator = sum([original[x] * test[x] for x in range(len(original))])
    denominator = sum([x * x for x in original])    
    denominator = denominator + (sum([x * x for x in test])) - numerator
    return numerator / denominator