def test_unequal_length_in_list(): """ Test if assertion error will be thrown if the lists have different length """ with pytest.raises(AssertionError, match=r'.* empty list .*'): get_distance(point1, point3) get_distance(point3, point1)
def test_null_list_input(): """ Test if the Value error will be raised if one of the parameter is empty list """ with pytest.raises(ValueError, match=r'.* empty list .*'): get_distance(point1, empty_point) get_distance(empty_point, point1)
def test_correct_cosine(): """ Test if the correct distance is return based on the metric """ assert get_distance(point1, point2, metric="eclidean") == distance.cosine(point1, point2)
def get_all_distances(point, data, dist="euclidean"): """ Return distance metric for each row in a dataframe as compared to an input list Compares an input reference vector to all rows of an input data frame, calculating the specified distance/similarity metric for each row. Parameters ---------- data : pandas dataframe dataframe of size n by k to compare to point point: list list of length k to compare to data dist: string string indicating type of distance metric Returns ------- list numeric vector of length n containing distances for each row of data Example ------- df = pd.DataFrame({"A" : [1,2,3], "B" : [8,2,4]}) point = [-2,4] get_all_distances(point, df, dist = "euclidean") >>> [5, 4.47, 5] """ # raise error if dataframe isn't the correct type of object if not isinstance(data, pd.DataFrame): raise Exception("the data argument should be a pandas dataframe") # raise error if first argument isn't a list if not isinstance(point, list): raise Exception("the point argument should be type list") # number of observations in data frame n = data.shape[0] k = data.shape[1] # raise error if point isn't length k if len(point) != k: raise Exception( "point vector length and number of columns in data should match") # raise error if dist isn't correctly defined if not dist in ["euclidean", "cosine", "manhattan"]: raise Exception( "dist should be one of 'euclidean','cosine' or 'manhattan'") # empty vector to be filled with distances distances = [] for obs in range(0, n): distances.append(get_distance(point, data.iloc[obs, ], metric=dist)) return distances
def test_correct_manhattan(): """ Test if the correct distance is return based on the metric """ assert get_distance(point1, point2, metric="manhattan") == get_manhattan_dist( point1, point2)
def test_non_numeric_element_output(): """ Test if the Value error will be raised if one of the parameter is empty list """ with pytest.raises(ValueError, match=r'.*non-numeric value.*'): get_distance(point1, point2, testing='output')
def test_non_numeric_element_input(): """ Test if the Value error will be raised if one of the parameter has non-numeric """ with pytest.raises(ValueError, match=r'.*non-numeric element.*'): get_distance(point1, bad_point)
def test_incorrect_metric(): """ Test if the metric provided is correct """ with pytest.raises(KeyError, match=r'.*metric has to be one of'): get_distance(point1, point2, metric="error test")