Exemple #1
0
def test_unequal_length_in_list():
    """
    Test if assertion error will be thrown if the lists have different length
    """
    with pytest.raises(AssertionError, match=r'.* empty list .*'):
        get_distance(point1, point3)
        get_distance(point3, point1)
Exemple #2
0
def test_null_list_input():
    """
    Test if the Value error will be raised if one of the parameter is empty list
    """
    with pytest.raises(ValueError, match=r'.* empty list .*'):
        get_distance(point1, empty_point)
        get_distance(empty_point, point1)
Exemple #3
0
def test_correct_cosine():
    """
    Test if the correct distance is return based on the metric
    """

    assert get_distance(point1, point2,
                        metric="eclidean") == distance.cosine(point1, point2)
def get_all_distances(point, data, dist="euclidean"):
    """
    Return distance metric for each row in a dataframe as compared to an input list

    Compares an input reference vector to all rows of an input data frame, calculating the specified distance/similarity metric for each row.

    Parameters
    ----------
    data : pandas dataframe
    dataframe of size n by k to compare to point

    point: list
    list of length k to compare to data

    dist: string
    string indicating type of distance metric

    Returns
    -------
    list
    numeric vector of length n containing distances for each row of data

    Example
    -------
    df = pd.DataFrame({"A" : [1,2,3], "B" : [8,2,4]})
    point = [-2,4]
    get_all_distances(point, df, dist = "euclidean")
    >>> [5, 4.47, 5]
    """

    # raise error if dataframe isn't the correct type of object
    if not isinstance(data, pd.DataFrame):
        raise Exception("the data argument should be a pandas dataframe")

    # raise error if first argument isn't a list
    if not isinstance(point, list):
        raise Exception("the point argument should be type list")

    # number of observations in data frame
    n = data.shape[0]
    k = data.shape[1]

    # raise error if point isn't length k
    if len(point) != k:
        raise Exception(
            "point vector length and number of columns in data should match")

    # raise error if dist isn't correctly defined
    if not dist in ["euclidean", "cosine", "manhattan"]:
        raise Exception(
            "dist should be one of 'euclidean','cosine' or 'manhattan'")

    # empty vector to be filled with distances
    distances = []

    for obs in range(0, n):
        distances.append(get_distance(point, data.iloc[obs, ], metric=dist))

    return distances
Exemple #5
0
def test_correct_manhattan():
    """
    Test if the correct distance is return based on the metric
    """

    assert get_distance(point1, point2,
                        metric="manhattan") == get_manhattan_dist(
                            point1, point2)
Exemple #6
0
def test_non_numeric_element_output():
    """
    Test if the Value error will be raised if one of the parameter is empty list
    """
    with pytest.raises(ValueError, match=r'.*non-numeric value.*'):
        get_distance(point1, point2, testing='output')
Exemple #7
0
def test_non_numeric_element_input():
    """
    Test if the Value error will be raised if one of the parameter has non-numeric
    """
    with pytest.raises(ValueError, match=r'.*non-numeric element.*'):
        get_distance(point1, bad_point)
Exemple #8
0
def test_incorrect_metric():
    """
    Test if the metric provided is correct
    """
    with pytest.raises(KeyError, match=r'.*metric has to be one of'):
        get_distance(point1, point2, metric="error test")