Ejemplo n.º 1
0
def Sokal_Michener(original, test):
    """
    Sokal and Michener Distance is distance measure for nominal or ordinal
    data.
    
    Given 2 lists (original and test), calculates the Sokal and Michener 
    Distance based on the formula,
    
    1 - [(number of regions where both species are present or absent)/
    (number of regions where both species are absent different)]
         
    @see: Sokal RR, Michener CD (1958) A statistical method for evaluating 
    systematic relationships. Univ Kansas Sci Bull 38:1409-1438
        
    @param original: list of original data
    @param test: list of data to test against original
    """
    if len(original) <> len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
                equal for Sokal & Michener's distance")
    in_original = 0.0
    #    in_test = 0.0
    in_both = 0.0
    for index in range(len(original)):
        if original[index] == test[index]: in_both = in_both + 1
        if original[index] != test[index]: in_original = in_original + 1


#        if original[index] < test[index]: in_test = in_test + 1
#    print in_original
    return 1 - (in_both / (in_both + in_original))
Ejemplo n.º 2
0
def Bray_Curtis(original, test):
    """
    Bray-Curtis Distance is distance measure for interval or ratio data.
    
    @see: Bray JR and Curtis JT. 1957. An ordination of the upland forest
    communities of S. Winconsin. Ecological Monographs 27: 325-349.
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Bray-Curtis distance")
    return Manhattan(original, test) / (summation(original) + summation(test))
Ejemplo n.º 3
0
def Canberra(original, test):
    """
    Canberra Distance is distance measure for interval or ratio data.
    
    @see: Lance GN and Williams WT. 1966. Computer programs for hierarchical 
    polythetic classification. Computer Journal 9: 60-64.
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Canberra distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + (abs(original[i] - test[i]) / abs(original[i] + test[i]))
    return sum
Ejemplo n.º 4
0
def Manhattan(original, test):
    """
    Manhattan Distance is distance measure for interval or ratio data.
    
    Manhattan Distance is also known as City Block Distance. It is essentially
    summation of the absolute difference between each element.

    @see: Krause, Eugene F. 1987. Taxicab Geometry. Dover. ISBN 0-486-25202-7. 
    
    @param original: list of original data
    @param test: list of data to test against original"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Manhattan distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + abs(original[i] - test[i])
    return sum
Ejemplo n.º 5
0
def Minkowski(original, test, power=3):
    """
    Minkowski Distance is distance measure for interval or ratio data.
    
    Minkowski Distance is a generalized absolute form of Euclidean Distance.
    Minkowski Distance = Euclidean Distance when power = 2
    
    @param original: list of original data
    @param test: list of data to test against original
    @param power: expontential variable
    @type power: integer"""
    if len(original) != len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Minkowski distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + abs(original[i] - test[i])**power
    return sum**(1 / float(power))
Ejemplo n.º 6
0
def Hamming(original, test):
    """
    Hamming Distance is distance measure for ordinal data - only for ordered
    data.
    
    Given 2 lists (original and test), calculates the Hamming Distance by 
    counting the number of ordered differences between the 2 lists.
    
    @param original: list of original data
    @param test: list of data to test against original
    """
    if len(original) <> len(test):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Hamming's distance")
    mismatch = 0
    for index in range(len(original)):
        if original[index] <> test[index]: mismatch = mismatch + 1
    return mismatch
Ejemplo n.º 7
0
def Euclidean(original, test):
    """
    Euclidean Distance is distance measure for interval or ratio data.
    
    euclidean(original, test) -> euclidean distance between original and test
    Adapted from BioPython
    
    @param original: list of original data
    @param test: list of data to test against original"""
    # lightly modified from implementation by Thomas Sicheritz-Ponten.
    # This works faster than the Numeric implementation on shorter
    # vectors.
    if len(original) != len(tst):
        raise DistanceInputSizeError("Size (length) of inputs must be \
            equal for Euclidean distance")
    sum = 0
    for i in range(len(original)):
        sum = sum + (original[i] - test[i])**2
    return math.sqrt(sum)