def test_build_ratings_matrix(): collaborative_filtering = CollaborativeFiltering(7) Y, users, movies = collaborative_filtering._build_ratings_matrix( to_numpy(X_train), to_numpy((y_train))) assert_array_equal(Y, Y_expected) assert_array_equal(users, users_expected) assert_array_equal(movies, movies_expected)
def test_gradient(): x = to_numpy([5, -9, 12]) numerical_grad = gradient(x, f) analytical_grad = grad_f(x) assert_allclose(numerical_grad, analytical_grad)
def _check_X_y(self, X, y): X = self._check_X(X) y = to_numpy(y) if y.ndim != 1: raise ValueError( f"Expected {(X.shape[0],)} shape of ratings, but {y.shape} received" ) return X, y
def _check_X(self, X): X = to_numpy(X) if X.ndim != 2: raise ValueError( "User-item pairs (X) should be a two-dimensional array") if X.shape[1] != 2: raise ValueError( f"Expected 2 columns in user-item pairs (X), but {X.shape[1]} received" ) return X
def test_fit_predict(): model = KMeans(3) model.fit(X) predicted_clusters_labels = model.predict(X) X_np = to_numpy(X) actual_clusters = [ X_np[actual_clusters_labels == label].tolist() for label in np.unique(actual_clusters_labels) ] for label in np.unique(predicted_clusters_labels): predicted_cluster = X_np[predicted_clusters_labels == label].tolist() assert predicted_cluster in actual_clusters
def test_cost_gradient(features_count, regularization_param, Y): Y = to_numpy(Y) users_count = Y.shape[1] items_count = Y.shape[0] params = unroll( glorot_init( ((users_count, features_count), (items_count, features_count)))) collaborative_filtering = CollaborativeFiltering(features_count, regularization_param) collaborative_filtering._users_count = users_count collaborative_filtering._items_count = items_count analytical_gradient = collaborative_filtering._cost_gradient(params, Y) numerical_gradient = gradient(params, collaborative_filtering._cost, (Y, )) assert_allclose(analytical_gradient, numerical_gradient, rtol=1E-4, atol=1E-4)
"""Tests for the KMeans class.""" import pytest import numpy as np from mymllib.clustering import KMeans from mymllib.preprocessing import to_numpy X = [[5, 0], [4, 1], [6, 2], [0, 6], [1, 5], [2, 7], [6, 6], [7, 5], [4, 7]] actual_clusters_labels = [0, 0, 0, 1, 1, 1, 2, 2, 2] @pytest.mark.parametrize("X", [to_numpy(X)]) @pytest.mark.parametrize("clusters_count", [len(X), len(X) + 1]) def test_random_init__not_enough_samples(X, clusters_count): model = KMeans(clusters_count) with pytest.raises(ValueError): model._random_init(X) @pytest.mark.parametrize("X", [to_numpy(X)]) @pytest.mark.parametrize("clusters_count", [1, 2, 3, len(X) - 1]) def test_random_init(X, clusters_count): model = KMeans(clusters_count) cluster_centroids = model._random_init(X) # Check that correct number of centroids was returned assert cluster_centroids.shape[0] == clusters_count # Check that all centroids are unique assert np.unique(cluster_centroids,
def test_minimize(optimizer): x0 = to_numpy([-7, 15, 4]) x = optimizer.minimize(f, grad_f, x0) assert_allclose(x, min_f, atol=1E-5)
def grad_f(x): return to_numpy([4*x[0], 2*x[1], 260*x[2]])
"""Tests for optimizers (subclasses of the BaseOptimizer class).""" import pytest from numpy.testing import assert_allclose from mymllib.optimization import GradientDescent, SciPyOptimizer from mymllib.preprocessing import to_numpy def f(x): return 2*x[0]**2 + 0.5*x[1]**4 + 130*x[2]**2 def grad_f(x): return to_numpy([4*x[0], 2*x[1], 260*x[2]]) min_f = to_numpy([0, 0, 0]) # Minimum of f() @pytest.mark.parametrize("optimizer", [GradientDescent(max_iterations=10000), SciPyOptimizer("L-BFGS-B")]) def test_minimize(optimizer): x0 = to_numpy([-7, 15, 4]) x = optimizer.minimize(f, grad_f, x0) assert_allclose(x, min_f, atol=1E-5)
def test_to_numpy__one_arg_passed(): A_numpy = to_numpy(A) assert isinstance(A_numpy, ndarray)
def test_one_hot(y, expected_labels, expected_y_one_hot): labels, y_one_hot = one_hot(to_numpy(y)) assert_array_equal(labels, expected_labels) assert_array_equal(y_one_hot, expected_y_one_hot)
def test_to_numpy__two_args_passed(): A_numpy, B_numpy = to_numpy(A, [1, 2, 3]) assert isinstance(A_numpy, ndarray) assert isinstance(B_numpy, ndarray)