def testHistogramExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=4) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram( raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(hist) result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected)
def testHistogramBinEdgesExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram_bin_edges(a, range=range_) result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram_bin_edges(raw, range=range_) np.testing.assert_array_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(bin_edges) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=bins) np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=[0, 4, 8]) np.testing.assert_array_equal(result, expected) raw = np.arange(5) a = tensor(raw, chunk_size=3) bin_edges = histogram_bin_edges(a) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(raw) self.assertEqual(bin_edges.shape, expected.shape) np.testing.assert_array_equal(result, expected)
def test_topk(): raw = np.random.rand(20) a = tensor(raw, chunk_size=10) t = topk(a, 2) t = tile(t) assert t.op.parallel_kind == 'tree' t = topk(a, 3) t = tile(t) assert t.op.parallel_kind == 'psrs' t = topk(sort(a), 3) t = tile(t) # k is less than 100 assert t.op.parallel_kind == 'tree' with pytest.raises(ValueError): topk(a, 3, parallel_kind='unknown')
def testTopk(self): raw = np.random.rand(20) a = tensor(raw, chunk_size=10) t = topk(a, 2) t = t.tiles() self.assertEqual(t.op.parallel_kind, 'tree') t = topk(a, 3) t = t.tiles() self.assertEqual(t.op.parallel_kind, 'psrs') t = topk(sort(a), 3) t = t.tiles() # k is less than 100 self.assertEqual(t.op.parallel_kind, 'tree') with self.assertRaises(ValueError): topk(a, 3, parallel_kind='unknown')
def test_histogram_bin_edges_execution(setup): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20, )) a = tensor(raw, chunk_size=6) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram_bin_edges(a, range=range_) result = bin_edges.execute().fetch() expected = np.histogram_bin_edges(raw, range=range_) np.testing.assert_array_equal(result, expected) raw2 = rs.randint(10, size=(1, )) b = tensor(raw2) raw3 = rs.randint(10, size=(0, )) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: test_bins = [ 10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges' ] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = bin_edges.execute().fetch() expected = np.histogram_bin_edges(r, bins=bins) np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = bin_edges.execute().fetch() expected = np.histogram_bin_edges(r, bins=[0, 4, 8]) np.testing.assert_array_equal(result, expected) raw = np.arange(5) a = tensor(raw, chunk_size=3) bin_edges = histogram_bin_edges(a) result = bin_edges.execute().fetch() expected = np.histogram_bin_edges(raw) assert bin_edges.shape == expected.shape np.testing.assert_array_equal(result, expected)
def test_sort(): a = tensor(np.random.rand(10, 10), chunk_size=(5, 10)) sa = sort(a) assert type(sa.op).__name__ == 'TensorSort' sa = tile(sa) assert len(sa.chunks) == 2 for c in sa.chunks: assert type(c.op).__name__ == 'TensorSort' assert type(c.inputs[0].op).__name__ == 'ArrayDataSource' a = tensor(np.random.rand(100), chunk_size=(10)) sa = sort(a) assert type(sa.op).__name__ == 'TensorSort' sa = tile(sa) for c in sa.chunks: assert type(c.op).__name__ == 'PSRSShuffle' assert c.op.stage == OperandStage.reduce assert c.shape == (np.nan, ) a = tensor(np.empty((10, 10), dtype=[('id', np.int32), ('size', np.int64)]), chunk_size=(10, 5)) sa = sort(a) assert sa.op.order == ['id', 'size'] with pytest.raises(ValueError): sort(a, order=['unknown_field']) with pytest.raises(np.AxisError): sort(np.random.rand(100), axis=1) with pytest.raises(ValueError): sort(np.random.rand(100), kind='non_valid_kind') with pytest.raises(ValueError): sort(np.random.rand(100), parallel_kind='non_valid_parallel_kind') with pytest.raises(TypeError): sort(np.random.rand(100), psrs_kinds='non_valid_psrs_kinds') with pytest.raises(ValueError): sort(np.random.rand(100), psrs_kinds=['quicksort'] * 2) with pytest.raises(ValueError): sort(np.random.rand(100), psrs_kinds=['non_valid_kind'] * 3) with pytest.raises(ValueError): sort(np.random.rand(100), psrs_kinds=[None, None, None]) with pytest.raises(ValueError): sort(np.random.rand(100), psrs_kinds=['quicksort', 'mergesort', None])
def testHistogramExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20, )) a = tensor(raw, chunk_size=3) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=4) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) ctx, executor = self._create_test_context(self.executor) with ctx: raw2 = rs.randint(10, size=(1, )) b = tensor(raw2) raw3 = rs.randint(10, size=(0, )) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [ 10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges' ] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(hist) result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected) # test unknown shape raw4 = rs.rand(10) d = tensor(raw4, chunk_size=3) d = d[d < 0.9] hist = histogram(d) result = executor.execute_tensors(hist)[0] expected = np.histogram(raw4[raw4 < 0.9])[0] np.testing.assert_array_equal(result, expected) raw5 = np.arange(3, 10) e = arange(10, chunk_size=3) e = e[e >= 3] hist = histogram(e) result = executor.execute_tensors(hist)[0] expected = np.histogram(raw5)[0] np.testing.assert_array_equal(result, expected)
def testSort(self): a = tensor(np.random.rand(10, 10), chunk_size=(5, 10)) sa = sort(a) self.assertEqual(type(sa.op).__name__, 'TensorSort') sa = sa.tiles() self.assertEqual(len(sa.chunks), 2) for c in sa.chunks: self.assertEqual(type(c.op).__name__, 'TensorSort') self.assertEqual(type(c.inputs[0].op).__name__, 'ArrayDataSource') a = tensor(np.random.rand(100), chunk_size=(10)) sa = sort(a) self.assertEqual(type(sa.op).__name__, 'TensorSort') sa = sa.tiles() for c in sa.chunks: self.assertEqual(type(c.op).__name__, 'PSRSShuffle') self.assertEqual(c.op.stage, OperandStage.reduce) self.assertEqual(c.shape, (np.nan, )) a = tensor(np.empty((10, 10), dtype=[('id', np.int32), ('size', np.int64)]), chunk_size=(10, 5)) sa = sort(a) self.assertSequenceEqual(sa.op.order, ['id', 'size']) with self.assertRaises(ValueError): sort(a, order=['unknown_field']) with self.assertRaises(np.AxisError): sort(np.random.rand(100), axis=1) with self.assertRaises(ValueError): sort(np.random.rand(100), kind='non_valid_kind') with self.assertRaises(ValueError): sort(np.random.rand(100), parallel_kind='non_valid_parallel_kind') with self.assertRaises(TypeError): sort(np.random.rand(100), psrs_kinds='non_valid_psrs_kinds') with self.assertRaises(ValueError): sort(np.random.rand(100), psrs_kinds=['quicksort'] * 2) with self.assertRaises(ValueError): sort(np.random.rand(100), psrs_kinds=['non_valid_kind'] * 3) with self.assertRaises(ValueError): sort(np.random.rand(100), psrs_kinds=[None, None, None]) with self.assertRaises(ValueError): sort(np.random.rand(100), psrs_kinds=['quicksort', 'mergesort', None])
def test_histogram_execution(setup): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20, )) a = tensor(raw, chunk_size=6) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=8) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = bin_edges.execute().fetch() expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = bin_edges.execute().fetch() expected = np.histogram(raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) raw2 = rs.randint(10, size=(1, )) b = tensor(raw2) raw3 = rs.randint(10, size=(0, )) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [ 10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges' ] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = hist.execute().fetch() expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = hist.execute().fetch() expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected) # test unknown shape raw4 = rs.rand(10) d = tensor(raw4, chunk_size=6) d = d[d < 0.9] hist = histogram(d) result = hist.execute().fetch()[0] expected = np.histogram(raw4[raw4 < 0.9])[0] np.testing.assert_array_equal(result, expected) raw5 = np.arange(3, 10) e = arange(10, chunk_size=6) e = e[e >= 3] hist = histogram(e) result = hist.execute().fetch()[0] expected = np.histogram(raw5)[0] np.testing.assert_array_equal(result, expected)
def testSortExecution(self): # only 1 chunk when axis = -1 raw = np.random.rand(100, 10) x = tensor(raw, chunk_size=10) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # 1-d chunk raw = np.random.rand(100) x = tensor(raw, chunk_size=10) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # structured dtype raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=100, dtype=np.int32) raw['size'] = np.random.randint(1000, size=100, dtype=np.int64) x = tensor(raw, chunk_size=10) sx = sort(x, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id'])) # test flatten case raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=5) sx = sort(x, axis=None) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=None)) # test multi-dimension raw = np.random.rand(10, 100) x = tensor(raw, chunk_size=(2, 10)) sx = sort(x, psrs_kinds=['quicksort'] * 3) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) raw = np.random.rand(10, 99) x = tensor(raw, chunk_size=(2, 10)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) # test 3-d raw = np.random.rand(20, 25, 28) x = tensor(raw, chunk_size=(10, 5, 7)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, axis=0) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=0)) sx = sort(x, axis=1) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=1)) # test multi-dimension with structured type raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32) raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64) x = tensor(raw, chunk_size=(3, 10)) sx = sort(x) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw)) sx = sort(x, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id'])) sx = sort(x, order=['size']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, order=['size'])) sx = sort(x, axis=0, order=['size', 'id']) res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal( res, np.sort(raw, axis=0, order=['size', 'id'])) raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(5, 4)) a.sort(axis=1) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, np.sort(raw, axis=1)) a.sort(axis=0) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res, np.sort(np.sort(raw, axis=1), axis=0))
def testPartitionExecution(self): # only 1 chunk when axis = -1 raw = np.random.rand(100, 10) x = tensor(raw, chunk_size=10) px = partition(x, [1, 8]) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res, np.partition(raw, [1, 8])) # 1-d chunk raw = np.random.rand(100) x = tensor(raw, chunk_size=10) kth = np.random.RandomState(0).randint(-100, 100, size=(10, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth)[kth]) # structured dtype raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=100, dtype=np.int32) raw['size'] = np.random.randint(1000, size=100, dtype=np.int64) x = tensor(raw, chunk_size=10) px = partition(x, kth, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[kth], np.partition(raw, kth, order=['size', 'id'])[kth]) # test flatten case raw = np.random.rand(10, 10) x = tensor(raw, chunk_size=5) px = partition(x, kth, axis=None) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth, axis=None)[kth]) # test multi-dimension raw = np.random.rand(10, 100) x = tensor(raw, chunk_size=(2, 10)) kth = np.random.RandomState(0).randint(-10, 10, size=(3, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) raw = np.random.rand(10, 99) x = tensor(raw, chunk_size=(2, 10)) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) # test 3-d raw = np.random.rand(20, 25, 28) x = tensor(raw, chunk_size=(10, 5, 7)) kth = np.random.RandomState(0).randint(-28, 28, size=(3, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, :, kth], np.partition(raw, kth)[:, :, kth]) kth = np.random.RandomState(0).randint(-20, 20, size=(3, )) px = partition(x, kth, axis=0) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw, kth, axis=0)[kth]) kth = np.random.RandomState(0).randint(-25, 25, size=(3, )) px = partition(x, kth, axis=1) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth, axis=1)[:, kth]) # test multi-dimension with structured type raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)]) raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32) raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64) x = tensor(raw, chunk_size=(3, 10)) kth = np.random.RandomState(0).randint(-100, 100, size=(10, )) px = partition(x, kth) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth)[:, kth]) px = partition(x, kth, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[:, kth], np.partition(raw, kth, order=['size', 'id'])[:, kth]) px = partition(x, kth, order=['size']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[:, kth], np.partition(raw, kth, order=['size'])[:, kth]) kth = np.random.RandomState(0).randint(-10, 10, size=(5, )) px = partition(x, kth, axis=0, order=['size', 'id']) res = self.executor.execute_tensor(px, concat=True)[0] np.testing.assert_array_equal( res[kth], np.partition(raw, kth, axis=0, order=['size', 'id'])[kth]) raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(5, 4)) kth = np.random.RandomState(0).randint(-12, 12, size=(2, )) a.partition(kth, axis=1) res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res[:, kth], np.partition(raw, kth, axis=1)[:, kth]) kth = np.random.RandomState(0).randint(-10, 10, size=(2, )) a.partition(kth, axis=0) raw_base = res res = self.executor.execute_tensor(a, concat=True)[0] np.testing.assert_array_equal(res[kth], np.partition(raw_base, kth, axis=0)[kth]) # test kth which is tensor raw = np.random.rand(10, 12) a = tensor(raw, chunk_size=(3, 5)) kth = (mt.random.rand(5) * 24 - 12).astype(int) px = partition(a, kth) sx = sort(a) res = self.executor.execute_tensor(px, concat=True)[0] kth_res = self.executor.execute_tensor(kth, concat=True)[0] sort_res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res]) a = tensor(raw, chunk_size=(10, 12)) kth = (mt.random.rand(5) * 24 - 12).astype(int) px = partition(a, kth) sx = sort(a) res = self.executor.execute_tensor(px, concat=True)[0] kth_res = self.executor.execute_tensor(kth, concat=True)[0] sort_res = self.executor.execute_tensor(sx, concat=True)[0] np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])