Exemple #1
0
def test_correct_output_type():
    """
    Tests that summary outputs a dataframe
    """
    medians, labels = KMedians(data, k)
    df = summary(data, medians, labels)

    assert isinstance(df, pd.DataFrame), 'Output is not a dataframe'
Exemple #2
0
def test_correct_input_type():
    """
    Tests the input types for summary are numpy arrays
    """
    medians, labels = KMedians(data, k)
    df = summary(data, medians, labels)

    assert isinstance(data, np.ndarray), 'Data is not a numpy array'
    assert isinstance(labels, np.ndarray), 'Labels is not a numpy array'
    assert isinstance(medians, np.ndarray), 'Medians is not a numpy array'
Exemple #3
0
def test_number_of_clusters():
    """
    Tests that the algorithm correctly predicts the total number of clusters
    """

    medians, labels = KMedians(data, k)
    df = summary(data, medians, labels)

    assert np.equal(len(df['Cluster Label']),
                    k), 'Number of clusters does not match input k'
Exemple #4
0
def test_number_of_points_in_clusters():
    """
    Tests that the algorithm preserves the total number of data points
    """

    medians, labels = KMedians(data, k)
    df = summary(data, medians, labels)

    assert np.equal(
        df['Number of Points in a Cluster'].sum(),
        data.shape[0]), 'Number of points in table does not match dataset'
Exemple #5
0
def test_toy_data_summary():
    """
    Tests that the algorithm correctly clusters toy example and tablulates data correctly
    """
    np.random.seed(1)
    X = np.array([[1, 2], [4, 5], [6, 2], [9, 8]])
    medians, labels = KMedians(X, 2)
    df = summary(X, medians, labels)

    assert np.equal(df.shape[0], 2)
    assert np.equal(df['Cluster Label'].sum(),
                    1), "Cluster Label doesn't match"
    assert np.equal(df['X Coordinates of Final Medians'].sum(),
                    13), "X Coordinate doesn't match"
    assert np.equal(df['Y Coordinates of Final Medians'].sum(),
                    10), "Y Coordinate doesn't match"
    assert np.equal(df['Number of Points in a Cluster'].sum(),
                    4), "Number of Points in a Cluster doesn't match"
    assert np.equal(round(df['Average Distance within Cluster'].sum(), 2),
                    2.67), "Average Distance within Cluster doesn't match"
    assert np.equal(df['Maxiumum Distance within Cluster'].sum(),
                    3.0), "Maxiumum Distance within Cluster doesn't match"
    assert np.equal(df['Minimum Distance within Cluster'].sum(),
                    2.0), "Minimum Distance within Cluster doesn't match"