def squared_distance(v1, v2): """ Compute the squared distance between two vectors. Args: v1, v2 (List): The vectors for which a squared distance will be computed. Returns: A scalar of the squared distance between the vectors. Examples: >>> x = [1, 2, 3] >>> y = [3, 2, 1] >>> squared_distance(x, y) 8 >>> e = [1,2,[1,2]] >>> squared_distance(x, e) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not (valid.is_vector(v1) and valid.is_vector(v2)): raise IndexError("One of the vectors passed is not a valid vector") return sum_of_squares(vector_subtract(v1, v2))
def distance(v1, v2): """ Compute the distance between two vectors. Args: v1, v2 (List): The vectors for which a distance will be computed. Returns: A scalar of the distance between the vectors. Examples: >>> x = [1, 2, 3] >>> y = [3, 2, 1] >>> distance(x, y) 2.8284271247461903 >>> e = [1, 2, [1, 2]] >>> distance(x, e) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not (valid.is_vector(v1) and valid.is_vector(v2)): raise IndexError("One of the vectors passed is not a valid vector") return magnitude(vector_subtract(v1, v2))
def mode(vector): """ Returns a list containing the most common value. Returns a list of len() > 1 in the case of ties. Args: vector (List): A list vector for which a mode is sought. Returns: A List containing one or more values which are the most frequent. Examples: >>> x = [1, 2, 3] >>> mode(x) [1, 2, 3] >>> y = [1, 2, 3, 2] >>> mode(y) [2] >>> z = [1, 2, [3, 4]] >>> mode(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") counts = Counter(vector) max_count = max(counts.values()) return [x_i for x_i, count in counts.items() if count == max_count]
def mean(vector): """ Return the simple arithmatic mean of a vector. Args: vector (List): A list vector containing the values to calculate against. Returns: A scalar Float value representing the mean of the vector. Examples: >>> x = [2,2,2] >>> mean(x) 2.0 >>> y = [1,2,3] >>> mean(y) 2.0 >>> z = [1, 2, 3, [1, 2]] >>> mean(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") return sum(vector) / len(vector)
def correlation(v_1, v_2): """ Calculates the correlation between two vectors. Args: v_1, v_2 (List): List vectors of the same length. Returns: A scalar value representing the correlation. Examples: >>> x = [1, 2, 3] >>> y = [3, 2, 1] >>> correlation(x, y) -1.0 >>> z = [1, 2] >>> correlation(x, z) Traceback (most recent call last): ... IndexError: The two vectors must be the same length. >>> zz = ['a', 'b', 'c'] >>> correlation(x, zz) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> zzz = [1, 2, [1, 2]] >>> correlation(x, zzz) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not len(v_1) == len(v_2): raise IndexError("The two vectors must be the same length.") if not (valid.is_vector(v_1) and valid.is_vector(v_2)): raise IndexError("The vector passed is not a valid vector") chk = v_1.copy() chk.extend(v_2) if any(isinstance(i, str) for i in chk): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) sd_v_1 = standard_deviation(v_1) sd_v_2 = standard_deviation(v_2) if sd_v_1 > 0 and sd_v_2 > 0: return covariance(v_1, v_2) / sd_v_1 / sd_v_2 else: return 0
def covariance(v_1, v_2): """ Calculates the covariance of two vectors. Args: v_1, v_2 (List): List vectors of the same length. Returns: A scalar value of the covariance. Examples: >>> x = [1, 2, 3] >>> y = [3, 2, 1] >>> covariance(x, y) -1.0 >>> z = [1, 2] >>> covariance(x, z) Traceback (most recent call last): ... IndexError: The two vectors must be the same length. >>> zz = ['a', 'b', 'c'] >>> covariance(x, zz) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> zzz = [1, 2, [1, 2]] >>> covariance(x, zzz) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not len(v_1) == len(v_2): raise IndexError("The two vectors must be the same length.") if not (valid.is_vector(v_1) and valid.is_vector(v_2)): raise IndexError("The vector passed is not a valid vector") chk = v_1.copy() chk.extend(v_2) if any(isinstance(i, str) for i in chk): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) num_elements = len(v_1) return dot(de_mean(v_1), de_mean(v_2)) / (num_elements - 1)
def variance(vector): """ Calculate the variance of a vector with length >= 2. Args: vector (List): A vector containing 2 or more Int or Float values. Returns: A scalar representing the variance. Examples: >>> x = [1, 2, 3] >>> variance(x) 1.0 >>> y = ['a', 'b', 'c'] >>> variance(y) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> z = [1, 2, [1, 2]] >>> variance(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector >>> zz = [1] >>> variance(zz) Traceback (most recent call last): ... IndexError: The vector must contain at least two values """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") if any(isinstance(i, str) for i in vector): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) if len(vector) < 2: raise IndexError("The vector must contain at least two values") num_elements = len(vector) deviations = de_mean(vector) return sum_of_squares(deviations) / (num_elements - 1)
def scalar_multiply(v, sc): """ Multiply a vector by a scalar. Args: v (List): List representing the vector. sc (int or float): Scalar to multiply by. Returns: A new vector of the same length as all of the vectors, where the corresponding elements have been summed. Examples: >>> scalar_multiply([1, 2, 3], 1) [1, 2, 3] >>> scalar_multiply([1, 2, 3], 2) [2, 4, 6] >>> scalar_multiply([1, 2, [1, 2]], 1) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not valid.is_vector(v): raise IndexError("The vector passed is not a valid vector") return [i * sc for i in v]
def sum_of_squares(v): """ Compute the sum of squares of for a vector Args: v (List): The vector for which the sum of squares should be calculated. Returns: A scalar of the sum of squares of the vector. Examples: >>> x = [1, 2, 3] >>> sum_of_squares(x) 14 >>> e = [1, 2, [1, 2]] >>> sum_of_squares(e) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not valid.is_vector(v): raise IndexError("The vector passed is not a valid vector") return dot(v, v)
def magnitude(v): """ Compute the magnitude of a vector Args: v (List): The vector for which the magnitude should be calculated. Returns: A scalar of the magnitude of the vector. Examples: >>> x = [1, 2, 3] >>> magnitude(x) 3.7416573867739413 >>> e = [1, 2, [1, 2]] >>> magnitude(e) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not valid.is_vector(v): raise IndexError("The vector passed is not a valid vector") return math.sqrt(sum_of_squares(v))
def quantile(vector, percentile): """ Returns the desired percentile value of a vector. Args: vector (List): A list vector containing the values to calculate against. percentile (Float): A value of the sought percentile such that (0.0 < value < 1.0) Returns: A scalar value from the target vector, which represents the value less than which a certain percentile of the data lies. Examples: >>> x = [*range(10)] >>> quantile(x, 0.30) 3 >>> quantile(x, 0.90) 9 >>> quantile(x, 1.0) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... ValueError: The percentile must be a Float such that (0.0 < percentile < 1.0) >>> z = [1, 2, 3, [1, 2, 3]] >>> quantile(z, 0.50) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (percentile > 0.0 and percentile < 1.0): message = ('The percentile must be a Float such that ' + '(0.0 < percentile < 1.0)') raise ValueError(message) if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") p_index = int(percentile * len(vector)) return sorted(vector)[p_index]
def standard_deviation(vector): """ Computes the standard deviation of a vector. Args: vector (List): A vector. Returns: A scalar value representing the standard deviation. Examples: >>> x = [1, 2, 3] >>> standard_deviation(x) 1.0 >>> y = ['a', 'b', 'c'] >>> standard_deviation(y) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> z = [1, 2, [1, 2]] >>> standard_deviation(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector >>> zz = [1] >>> standard_deviation(zz) Traceback (most recent call last): ... IndexError: The vector must contain at least two values """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") if any(isinstance(i, str) for i in vector): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) if len(vector) < 2: raise IndexError("The vector must contain at least two values") return math.sqrt(variance(vector))
def median(vector): """ Return the median (central value) of a vector. Args: vector (List): A list vector containing the values to calculate against. Returns: A scalar Float value representing the median of the vector. Examples: >>> w = [2, 2, 2] >>> median(w) 2.0 >>> x = [1, 2, 5] >>> median(x) 2.0 >>> y = [1, 2, 3, 4] >>> median(y) 2.5 >>> z = [1, 2, 3, [1, 2]] >>> median(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") elements = len(vector) sorted_vector = sorted(vector) midpoint = elements // 2 if elements % 2 == 1: return float(sorted_vector[midpoint]) else: low = midpoint - 1 hih = midpoint return mean([sorted_vector[low], sorted_vector[hih]])
def vector_subtract(v1, v2): """ Subtracts Corresponding elements in two vectors (lists) of the same length. Args: v1, v2 (List): Lists of the same length representing the vectors. Returns: A new vector of the same length as v1 and v2, where the corresponding elements have been subtracted to each other. Examples: >>> w = [3, 2, 1] >>> x = [1, 2, 3] >>> vector_subtract(w, x) [2, 0, -2] >>> y = [1, 2, 3] >>> vector_subtract(x, y) [0, 0, 0] >>> z = [1, 3] >>> vector_subtract(x, z) Traceback (most recent call last): ... IndexError: Vectors must be the same length >>> e = [1,2,[1,2]] >>> vector_subtract(x, e) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not (valid.is_vector(v1) and valid.is_vector(v2)): raise IndexError("One of the vectors passed is not a valid vector") if len(v1) != len(v2): raise IndexError('Vectors must be the same length') return [v1_i - v2_i for v1_i, v2_i in zip(v1, v2)]
def data_range(vector): """ Returns max(vector) - min(vector). Only works for vectors containing Int and Float values. Errors on character vectors. Args: vector (List): A vector to calculate against. Returns: A scalar value of the same type as the vector. Examples: >>> w = [1, 2, 3] >>> data_range(w) 2 >>> x = [1.0, 2.0, 3.0] >>> data_range(x) 2.0 >>> y = ['a', 'b', 'c'] >>> data_range(y) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> z = [1, 2, [1, 2]] >>> data_range(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") if any(isinstance(i, str) for i in vector): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) return max(vector) - min(vector)
def de_mean(vector): """ Translate the vector by subtracting its mean, so the result has mean=0. Args: vector (List): a valid vector Returns: A vector (List), centered on 0. Examples: >>> x = [1, 2, 3] >>> de_mean(x) [-1.0, 0.0, 1.0] >>> y = ['a', 'b', 'c'] >>> de_mean(y) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> z = [1, 2, [1, 2]] >>> de_mean(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") if any(isinstance(i, str) for i in vector): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) x_bar = mean(vector) return [x_i - x_bar for x_i in vector]
def dot(v1, v2): """ Compute the dot product of two vectors. Args: v1, v2 (List): The vectors for which a dot product will be computed. Must be the same length. Returns: A scalar of the dot product of the two vectors. Examples: >>> w = [1, 2, 3] >>> x = [1, 2, 3] >>> dot(w, x) 14 >>> z = [1, 2] >>> dot(x, z) Traceback (most recent call last): ... IndexError: Vectors must be the same length >>> e = [1, 2, [1, 2]] >>> dot(x, e) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not (valid.is_vector(v1) and valid.is_vector(v2)): raise IndexError("One of the vectors passed is not a valid vector") if not len(v1) == len(v2): raise IndexError('Vectors must be the same length') return sum(v1_i * v2_i for v1_i, v2_i in zip(v1, v2))
def interquartile_range(vector): """ Calculates the difference between the 75th and 25th percentile. Args: vector (List): A numeric vector. Returns: A scalar value representing the interquartile range. Examples: >>> x = [*range(10)] >>> interquartile_range(x) 5 >>> y = ['a', 'b', 'c'] >>> interquartile_range(y) # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... TypeError: The vector passed must contains either Int or Float values. >>> z = [1, 2, [1, 2]] >>> interquartile_range(z) Traceback (most recent call last): ... IndexError: The vector passed is not a valid vector """ if not (valid.is_vector(vector) and valid.is_vector(vector)): raise IndexError("The vector passed is not a valid vector") if any(isinstance(i, str) for i in vector): message = ("The vector passed must contains either " + "Int or Float values.") raise TypeError(message) return quantile(vector, 0.75) - quantile(vector, 0.25)
def vector_sum(vectors): """ Sum Corresponding elements in all vectors (lists) of the same length. Args: vectors (List): Lists of the same length representing the vectors. Returns: A new vector of the same length as all of the vectors, where the corresponding elements have been summed. Examples: >>> w = [1, 2, 3] >>> x = [1, 2, 3] >>> y = [1, 2, 3] >>> myVecs = [w, x, y] >>> vector_sum(myVecs) [3, 6, 9] >>> z = [1, 3] >>> myVecs = [w, x, y, z] >>> vector_sum(myVecs) Traceback (most recent call last): ... IndexError: Vectors must be the same length >>> e = [1, 2, [1, 2]] >>> vector_sum([w, x, y, e]) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not all(valid.is_vector(v) for v in vectors): raise IndexError("One of the vectors passed is not a valid vector") lengthToTest = len(vectors[0]) if any(len(v) != lengthToTest for v in vectors[1:]): raise IndexError('Vectors must be the same length') return reduce(vector_add, vectors)
def vector_mean(vectors): """ Compute the vector whose ith element is the mean of the ith elements of the input vectors Args: vectors (List): List containing all of the vectors to be computed against. Returns: A new vector of the same length as all of the vectors, where the corresponding elements have had a mean calculated on them. Examples: >>> myVecs = [ [1, 2, 3], [1, 2, 3], [1, 2, 3] ] >>> vector_mean(myVecs) [1.0, 2.0, 3.0] >>> myVecs = [ [1, 2, 3], [2, 2, 2], [3, 2, 1] ] >>> vector_mean(myVecs) [2.0, 2.0, 2.0] >>> myVecs = [ [1, 2, 3], [2, 2, 2], [2, 1] ] >>> vector_mean(myVecs) Traceback (most recent call last): ... IndexError: Vectors must be the same length >>> myVecs = [ [1, 2, 3], [2, 2, 2], [2, 1, [1, 2] ] ] >>> vector_mean(myVecs) Traceback (most recent call last): ... IndexError: One of the vectors passed is not a valid vector """ if not all(valid.is_vector(v) for v in vectors): raise IndexError("One of the vectors passed is not a valid vector") n = len(vectors) return scalar_multiply(vector_sum(vectors), 1/n)