def test_svd_errors(self): """ Tests SVD raises """ with self.assertRaises(ValueError): ds.svd(ds.random_array((3, 9), (2, 2))) with self.assertRaises(ValueError): ds.svd(ds.random_array((3, 3), (3, 3)))
def test_rechunk_exceptions(self): """ Tests exceptions of the rechunk function """ x = ds.random_array((50, 50), (10, 10)) with self.assertRaises(ValueError): x.rechunk((100, 10)) x = ds.random_array((50, 50), (10, 10)) with self.assertRaises(ValueError): x.rechunk((10, 100))
def test_kron_regular(self, a_shape, a_bsize, b_shape, b_bsize): """ Tests kron when blocks of b are all equal """ a = ds.random_array(a_shape, a_bsize) b = ds.random_array(b_shape, b_bsize) computed = ds.kron(a, b) expected = np.kron(a.collect(), b.collect()) self.assertTrue(_validate_array(computed)) self.assertTrue(_equal_arrays(computed.collect(), expected))
def test_init_random(self): """ Tests GaussianMixture random initialization """ x = ds.random_array((50, 3), (10, 3), random_state=0) gm = GaussianMixture(init_params='random', n_components=4, arity=2, random_state=170) gm.fit(x) self.assertGreater(gm.n_iter, 5)
def test_rechunk(self, shape, bsize_in, bsize_out): """ Tests the rechunk function """ x = ds.random_array(shape, bsize_in) re = x.rechunk(bsize_out) self.assertEqual(re._reg_shape, bsize_out) self.assertEqual(re._top_left_shape, bsize_out) self.assertTrue(_validate_array(re)) self.assertTrue(_equal_arrays(x.collect(), re.collect()))
def test_matmul_error(self): """ Tests matmul not implemented cases """ with self.assertRaises(ValueError): x1 = ds.random_array((5, 3), (5, 3)) x2 = ds.random_array((5, 3), (5, 3)) x1 @ x2 with self.assertRaises(ValueError): x1 = ds.random_array((5, 3), (5, 3)) x2 = ds.random_array((3, 5), (2, 5)) x1 @ x2 with self.assertRaises(ValueError): x1 = ds.array([[1, 2, 3], [4, 5, 6]], (2, 3)) x2 = ds.array(sp.csr_matrix([[1, 2], [4, 5], [7, 6]]), (3, 2)) x1 @ x2
def test_invalid_indexing(self): """ Tests invalid indexing """ x = ds.random_array((5, 5), (1, 1)) with self.assertRaises(IndexError): x[[3], [4]] with self.assertRaises(IndexError): x[7, 4] with self.assertRaises(IndexError): x["sss"] with self.assertRaises(NotImplementedError): x[:, 4]
def main(): n_samples = 300000000 n_chunks = 1536 chunk_size = int(np.ceil(n_samples / n_chunks)) n_features = 100 n_clusters = 500 x = ds.random_array((n_samples, n_features), (chunk_size, n_features)) km = KMeans(n_clusters=n_clusters, max_iter=5, tol=0, arity=48) performance.measure("KMeans", "300M", km.fit, x)
def test_random(self): """ Tests random array """ arr1 = ds.random_array((93, 177), (43, 31), random_state=88) self.assertEqual(arr1.shape, arr1.collect().shape) self.assertEqual(arr1._n_blocks, (3, 6)) self.assertEqual(arr1._reg_shape, (43, 31)) self.assertEqual(arr1._blocks[2][0].shape, (7, 31)) self.assertEqual(arr1._blocks[2][5].shape, (7, 22)) self.assertEqual(arr1._blocks[0][5].shape, (43, 22)) self.assertEqual(arr1._blocks[0][0].shape, (43, 31)) arr2 = ds.random_array((93, 177), (43, 31), random_state=88) arr3 = ds.random_array((93, 177), (43, 31), random_state=666) arr4 = ds.random_array((193, 77), (21, 51)) arr5 = ds.random_array((193, 77), (21, 51)) self.assertTrue(np.array_equal(arr1.collect(), arr2.collect())) self.assertFalse(np.array_equal(arr1.collect(), arr3.collect())) self.assertFalse(np.array_equal(arr4.collect(), arr5.collect()))
def main(): n_samples = 100000000 n_chunks = 768 chunk_size = int(np.ceil(n_samples / n_chunks)) n_features = 100 n_clusters = 50 x = ds.random_array((n_samples, n_features), (chunk_size, n_features)) gmm = GaussianMixture(n_components=n_clusters, max_iter=5, tol=0, init_params="random") performance.measure("GMM", "100M", gmm.fit, x)
def test_get_slice_shapes(self): """ Tests that shapes are correct after slicing """ arr = ds.random_array((100, 100), (25, 25)) ex = arr[1:, arr.shape[1] - 1: arr.shape[1]] self.assertEqual(ex._top_left_shape, (24, 1)) self.assertEqual(ex._reg_shape, (25, 25)) self.assertEqual(ex.shape, (99, 1)) tl = compss_wait_on(ex._blocks[0][0]) reg = compss_wait_on(ex._blocks[1][0]) self.assertEqual(tl.shape, (24, 1)) self.assertEqual(reg.shape, (25, 1))
def test_set_item(self): """ Tests setting a single value """ x = ds.random_array((10, 10), (3, 3)) x[5, 5] = -1 x[0, 0] = -2 x[9, 9] = -3 self.assertTrue(_validate_array(x)) x_np = x.collect() self.assertEqual(x_np[5][5], -1) self.assertEqual(x_np[0][0], -2) self.assertEqual(x_np[9][9], -3) with self.assertRaises(ValueError): x[0, 0] = [2, 3, 4] with self.assertRaises(IndexError): x[10, 2] = 3 with self.assertRaises(IndexError): x[0] = 3
def test_matmul_error(self): """ Tests matmul errors """ with self.assertRaises(NotImplementedError): x1 = ds.random_array((5, 3), (2, 2)) x1 = x1[1:] x2 = ds.random_array((3, 5), (2, 2)) ds.matmul(x1, x2) with self.assertRaises(NotImplementedError): x1 = ds.random_array((5, 3), (2, 2)) x2 = ds.random_array((3, 5), (2, 2)) x2 = x2[1:] ds.matmul(x1, x2) with self.assertRaises(ValueError): x1 = ds.random_array((5, 3), (5, 3)) x2 = ds.random_array((3, 5), (2, 5)) ds.matmul(x1, x2, transpose_a=True)
class MathTest(unittest.TestCase): @parameterized.expand([((21, 33), (10, 15), False), ((5, 10), (8, 1), False), ((17, 13), (1, 9), False), ((6, 1), (12, 23), False), ((1, 22), (25, 16), False), ((1, 12), (1, 3), False), ((14, 1), (4, 1), False), ((10, 1), (1, 19), False), ((1, 30), (12, 1), False)]) def test_kron(self, shape_a, shape_b, sparse): """ Tests kronecker product """ np.random.seed() a_np = np.random.random(shape_a) b_np = np.random.random(shape_b) expected = np.kron(a_np, b_np) if sparse: a_np = sp.csr_matrix(a_np) b_np = sp.csr_matrix(b_np) b0 = np.random.randint(1, a_np.shape[0] + 1) b1 = np.random.randint(1, a_np.shape[1] + 1) b2 = np.random.randint(1, b_np.shape[0] + 1) b3 = np.random.randint(1, b_np.shape[1] + 1) a = ds.array(a_np, (b0, b1)) b = ds.array(b_np, (b2, b3)) b4 = np.random.randint(1, (b0 * b2) + 1) b5 = np.random.randint(1, (b1 * b3) + 1) computed = ds.kron(a, b, (b4, b5)) self.assertTrue(_validate_array(computed)) computed = computed.collect(False) # convert to ndarray because there is no kron for sparse matrices in # scipy if a._sparse: computed = computed.toarray() self.assertTrue(_equal_arrays(expected, computed)) @parameterized.expand([((15, 13), (3, 6), (9, 6), (3, 2)), ((7, 8), (2, 3), (1, 15), (1, 15))]) def test_kron_regular(self, a_shape, a_bsize, b_shape, b_bsize): """ Tests kron when blocks of b are all equal """ a = ds.random_array(a_shape, a_bsize) b = ds.random_array(b_shape, b_bsize) computed = ds.kron(a, b) expected = np.kron(a.collect(), b.collect()) self.assertTrue(_validate_array(computed)) self.assertTrue(_equal_arrays(computed.collect(), expected)) @parameterized.expand([(ds.array([[1, 0, 0, 0], [0, 0, 0, 2], [0, 3, 0, 0], [2, 0, 0, 0]], (2, 2)),), (ds.random_array((17, 5), (1, 1)),), (ds.random_array((9, 7), (9, 6)),), (ds.random_array((10, 10), (2, 2))[1:, 1:],)]) def test_svd(self, x): """ Tests SVD """ x_np = x.collect() u, s, v = ds.svd(x) u = u.collect() s = np.diag(s.collect()) v = v.collect() self.assertTrue(np.allclose(x_np, u @ s @ v.T)) self.assertTrue( np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1]))) self.assertTrue( np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1]))) u, s, v = ds.svd(x, sort=False) u = u.collect() s = np.diag(s.collect()) v = v.collect() self.assertTrue(np.allclose(x_np, u @ s @ v.T)) self.assertTrue( np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1]))) self.assertTrue( np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1]))) s = ds.svd(x, compute_uv=False, sort=False) s = np.diag(s.collect()) # use U and V from previous decomposition self.assertTrue(np.allclose(x_np, u @ s @ v.T)) self.assertTrue( np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1]))) self.assertTrue( np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1]))) u, s, v = ds.svd(x, copy=False) u = u.collect() s = np.diag(s.collect()) v = v.collect() self.assertTrue(np.allclose(x_np, u @ s @ v.T)) self.assertTrue( np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1]))) self.assertTrue( np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1]))) def test_svd_errors(self): """ Tests SVD raises """ with self.assertRaises(ValueError): ds.svd(ds.random_array((3, 9), (2, 2))) with self.assertRaises(ValueError): ds.svd(ds.random_array((3, 3), (3, 3)))
def main(): x = ds.random_array((20000, 20000), (100, 100)) performance.measure("TR", "20K", x.transpose)