def Bray_Curtis(original, test): """ Complement Bray and Curtis coefficient for interval or ratio data. Lower boundary of Bray and Curtis coefficient represents complete similarity (no difference). Coefficient: M{1 - S{sum}(abs((A + B)(i) - (A + C)(i))) / (S{sum}((A + B)(i)) + S{sum}((A + C)(i)))} @see: Bray JR and Curtis JT. 1957. An ordination of the upland forest communities of S. Winconsin. Ecological Monographs 27: 325-349. @param original: list of original data @param test: list of data to test against original @status: Tested function @since: version 0.4 """ if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Bray-Curtis distance") return 1 - (Manhattan(original, test) / \ float(sum(original) + sum(test)))
def Manhattan(original, test): """ Manhattan coefficient for interval or ratio data. Coefficient: M{S{sum}(abs((A + B)(i) - (A + C)(i)))} Manhattan Distance is also known as City Block Distance. It is essentially summation of the absolute difference between each element. @see: Krause, Eugene F. 1987. Taxicab Geometry. Dover. ISBN 0-486- 25202-7. @param original: list of original data @param test: list of data to test against original @status: Tested function @since: version 0.4 """ if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Manhattan distance") sum = 0 for i in range(len(original)): sum = sum + abs(original[i] - test[i]) return float(sum)
def Euclidean(original, test): """ Euclidean coefficient for interval or ratio data. Coefficient: M{sqrt(S{sum}(((A + B)(i) - (A + C)(i)) ^ 2))} euclidean(original, test) -> euclidean distance between original and test. Adapted from BioPython @param original: list of original data @param test: list of data to test against original @status: Tested function @since: version 0.1 """ # lightly modified from implementation by Thomas Sicheritz-Ponten. # This works faster than the Numeric implementation on shorter # vectors. if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Euclidean distance") sum = 0 for i in range(len(original)): sum = sum + (original[i] - test[i]) ** 2 return math.sqrt(sum)
def Cosine(original, test): """ Cosine coefficient for interval or ratio data. Coefficient: M{S{sum}(abs((A + B)(i) * (A + C)(i))) / (S{sum}((A + B) ^ 2) * S{sum}((A + C) ^ 2))} @param original: list of original data @param test: list of data to test against original""" if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Cosine distance") original = [float(x) for x in original] test = [float(x) for x in test] numerator = sum([original[x] * test[x] for x in range(len(original))]) denominator = sum([x * x for x in original]) ** 0.5 denominator = denominator * (sum([x * x for x in test]) ** 0.5) return numerator / denominator
def Hamming(original, test): """ Hamming coefficient for ordinal data - only for positional data. Coefficient: number of mismatches with respect to position @param original: list of original data @param test: list of data to test against original @see: Ling, MHT. 2010. COPADS, I: Distances Measures between Two Lists or Sets. The Python Papers Source Codes 2:2. """ if len(original) <> len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Hamming's distance") mismatch = 0 for index in range(len(original)): if original[index] <> test[index]: mismatch = mismatch + 1 return mismatch
def Canberra(original, test): """ Canberra coefficient for interval or ratio data. Coefficient: M{S{sum}(abs((A + B)(i) - (A + C)(i)) / abs((A + B)(i) + (A + C)(i)))} @see: Lance GN and Williams WT. 1966. Computer programs for hierarchical polythetic classification. Computer Journal 9: 60-64. @param original: list of original data @param test: list of data to test against original""" if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Canberra distance") sum = 0 for i in range(len(original)): sum = sum + (abs(original[i] - test[i]) / abs(original[i] + \ test[i])) return sum
def Minkowski(original, test, power=3): """ Minkowski coefficient for interval or ratio data. Coefficient: M{power-th root(S{sum}(((A + B)(i) - (A + C)(i)) ^ power))} Minkowski Distance is a generalized absolute form of Euclidean Distance. Minkowski Distance = Euclidean Distance when power = 2 @param original: list of original data @param test: list of data to test against original @param power: expontential variable @type power: integer""" if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Minkowski distance") sum = 0 for i in range(len(original)): sum = sum + abs(original[i] - test[i]) ** power return sum ** (1 / float(power))
def Sokal_Michener(original, test, absent=0, type='Set'): """ Sokal and Michener coefficient for nominal or ordinal data. Coefficient: M{(A + D) / (A + B + C + D)} @param original: list of original data @param test: list of data to test against original @param absent: user-defined identifier for absent of region, default = 0 @param type: {Set | List}, define whether use Set comparison (non-positional) or list comparison (positional), default = Set @see: Ling, MHT. 2010. COPADS, I: Distances Measures between Two Lists or Sets. The Python Papers Source Codes 2:2. """ if len(original) <> len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Sokal & Michener's distance") (original, test, both, none) = compare(original, test, absent, type) return (both + none) / (original + test + both + none)
def Tanimoto(original, test): """ Tanimoto coefficient for interval or ratio data. Coefficient: M{S{sum}(abs((A + B)(i) * (A + C)(i))) / (S{sum}((A + B) ^ 2) + S{sum}((A + C) ^ 2) - S{sum}(abs((A + B)(i) * (A + C)(i))))} @param original: list of original data @param test: list of data to test against original @status: Tested function @since: version 0.4 """ if len(original) != len(test): raise DistanceInputSizeError("Size (length) of inputs must be \ equal for Cosine distance") original = [float(x) for x in original] test = [float(x) for x in test] numerator = sum([original[x] * test[x] for x in range(len(original))]) denominator = sum([x * x for x in original]) denominator = denominator + (sum([x * x for x in test])) - numerator return numerator / denominator