def test_arrange_sparse(self): """ Tests that arrange produces the same results with sparse and dense data structures.""" file_ = "tests/files/libsvm/2" sparse, _ = ds.load_svmlight_file(file_, (10, 300), 780, True) dense, _ = ds.load_svmlight_file(file_, (10, 200), 780, False) arranged_d, sort_d, _ = _arrange_samples(dense, 3, [128, 184]) arranged_sp, sort_sp, _ = _arrange_samples(sparse, 3, [128, 184]) arranged_sp = compss_wait_on(arranged_sp) arranged_d = compss_wait_on(arranged_d) sort_d = compss_wait_on(sort_d) sort_sp = compss_wait_on(sort_sp) self.assertEqual(len(arranged_sp), len(arranged_d)) self.assertFalse(issparse(arranged_d[0])) self.assertTrue(issparse(arranged_sp[0])) self.assertTrue( np.array_equal(np.concatenate(np.concatenate(sort_sp).flatten()), np.concatenate(np.concatenate(sort_d).flatten()))) for index in range(len(arranged_sp)): samples_sp = arranged_sp[index].toarray() samples_d = arranged_d[index] self.assertTrue(np.array_equal(samples_sp, samples_d))
def test_arrange_indices(self): """ Tests that arrange returns correct indices with toy data. """ x = ds.array(np.array([[1, 1], [8, 8], [2, 5], [1, 7], [4, 4], [5, 9], [4, 0], [8, 1], [7, 4]]), block_size=(3, 2)) arranged, sorting, _ = _arrange_samples(x, n_regions=3) arranged = compss_wait_on(arranged) arranged = np.vstack(arranged) sorting = np.asarray(compss_wait_on(sorting)) indices = np.empty(x.shape[0], dtype=int) oldidx = 0 # generate new indices based on sorting for j in range(sorting.shape[1]): for i in range(sorting.shape[0]): if sorting[i][j][0].size > 0: newidx = sorting[i][j][0] + 3 * i indices[newidx] = oldidx oldidx += 1 indices = np.squeeze(indices) self.assertTrue(np.array_equal(arranged[indices], x.collect()))
def test_rearrange(self): """ Tests rearrange function """ original = np.array([[7], [1], [5], [2], [0], [6]]) x = ds.array(original, block_size=(3, 1)) arranged, sorting, _ = _arrange_samples(x, n_regions=3) blocks = compss_wait_on(_rearrange_labels(arranged, sorting, 2)) self.assertEqual(len(blocks), 2) self.assertTrue( np.array_equal(np.concatenate(blocks).flatten(), x.collect()))
def test_arrange_dimensions(self): """ Tests arrange method using a subset of the dimensions. """ x = ds.array(np.array([[0, 1, 9], [8, 8, 2], [2, 5, 4], [1, 7, 6], [4, 4, 2], [5, 9, 0], [4, 0, 1], [9, 1, 7], [7, 4, 3]]), block_size=(3, 2)) arranged, _, _ = _arrange_samples(x, n_regions=3, dimensions=[0]) arranged = compss_wait_on(arranged) self.assertEqual(arranged[0].shape[0], 3) self.assertEqual(arranged[1].shape[0], 3) self.assertEqual(arranged[2].shape[0], 3) self.assertEqual(len(arranged), 3) arranged, _, _ = _arrange_samples(x, n_regions=3, dimensions=[0, 1]) arranged = compss_wait_on(arranged) self.assertEqual(arranged[0].shape[0], 1) self.assertEqual(arranged[1].shape[0], 1) self.assertEqual(arranged[2].shape[0], 1) self.assertEqual(arranged[4].shape[0], 1) self.assertEqual(arranged[5].shape[0], 1) self.assertEqual(len(arranged), 9) arranged, _, _ = _arrange_samples(x, n_regions=3, dimensions=[1, 2]) arranged = compss_wait_on(arranged) self.assertEqual(arranged[0].shape[0], 1) self.assertEqual(arranged[1].shape[0], 0) self.assertEqual(arranged[2].shape[0], 2) self.assertEqual(arranged[3].shape[0], 1) self.assertEqual(arranged[4].shape[0], 2) self.assertEqual(arranged[5].shape[0], 0) self.assertEqual(arranged[6].shape[0], 2) self.assertEqual(arranged[7].shape[0], 0) self.assertEqual(arranged[8].shape[0], 1) self.assertEqual(len(arranged), 9)
def test_arrange_same_min_max(self): """ Tests arrange when one of the features only takes one value """ x = ds.array(np.array([[1, 0], [8, 0], [2, 0], [2, 0], [3, 0], [5, 0]]), block_size=(3, 2)) arranged, _, _ = _arrange_samples(x, n_regions=3) arranged = compss_wait_on(arranged) self.assertEqual(len(arranged), 9) self.assertTrue(arranged[2].shape[0], 4) self.assertTrue(arranged[5].shape[0], 1) self.assertTrue(arranged[8].shape[0], 1)
def test_arrange(self): """ Tests the arrange method with toy data.""" x = ds.array(np.array([[1, 1], [8, 8], [2, 5], [1, 7], [4, 4], [5, 9], [4, 0], [8, 1], [7, 4]]), block_size=(3, 2)) arranged, _, _ = _arrange_samples(x, n_regions=3) arranged = compss_wait_on(arranged) self.assertEqual(len(arranged), 9) true_samples = np.array([[1, 1], [2, 5], [1, 7], [4, 0], [4, 4], [5, 9], [8, 1], [7, 4], [8, 8]]) self.assertTrue(np.array_equal(np.vstack(arranged), true_samples))