def testPercentileExecution(self): raw = np.random.rand(20, 10) q = np.random.RandomState(0).randint(100, size=11) a = tensor(raw, chunk_size=7) r = percentile(a, q) result = self.executor.execute_tensor(r, concat=True)[0] expected = np.percentile(raw, q) np.testing.assert_array_equal(result, expected) mq = tensor(q) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: r = percentile(a, mq) result = executor.execute_tensors([r])[0] np.testing.assert_array_equal(result, expected)
def execute_size(t): def _tensordot_size_recorder(ctx, op): TensorTensorDot.estimate_size(ctx, op) chunk_key = op.outputs[0].key chunk_sizes[chunk_key] = ctx[chunk_key] chunk_nbytes[chunk_key] = op.outputs[0].nbytes input_sizes = dict( (inp.op.key, ctx[inp.key][0]) for inp in op.inputs) chunk_input_sizes[chunk_key] = sum(input_sizes.values()) input_nbytes = dict( (inp.op.key, inp.nbytes) for inp in op.inputs) chunk_input_nbytes[chunk_key] = sum(input_nbytes.values()) size_executor = ExecutorForTest( sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) try: chunk_sizes.clear() chunk_nbytes.clear() chunk_input_sizes.clear() chunk_input_nbytes.clear() register(TensorTensorDot, size_estimator=_tensordot_size_recorder) size_executor.execute_tensor(t, mock=True) finally: register_default(TensorTensorDot)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testImreadExecution(self): with tempfile.TemporaryDirectory() as tempdir: raws = [] for i in range(10): array = np.random.randint(0, 256, 2500, dtype=np.uint8).reshape((50, 50)) raws.append(array) im = Image.fromarray(array) im.save(os.path.join(tempdir, f'random_{i}.png')) # Single image t = imread(os.path.join(tempdir, 'random_0.png')) res = self.executor.execute_tensor(t, concat=True)[0] np.testing.assert_array_equal(res, raws[0]) t2 = imread(os.path.join(tempdir, 'random_*.png')) res = self.executor.execute_tensor(t2, concat=True)[0] np.testing.assert_array_equal(np.sort(res, axis=0), np.sort(raws, axis=0)) t3 = imread(os.path.join(tempdir, 'random_*.png'), chunk_size=4) res = self.executor.execute_tensor(t3, concat=True)[0] np.testing.assert_array_equal(np.sort(res, axis=0), np.sort(raws, axis=0)) t4 = imread(os.path.join(tempdir, 'random_*.png'), chunk_size=4) res = self.executor.execute_tensor(t4, concat=True)[0] np.testing.assert_array_equal(np.sort(res, axis=0), np.sort(raws, axis=0))
class TestIndexReduction(TestBase): def setUp(self): self.executor = ExecutorForTest() def testIndexReduction(self): rs = np.random.RandomState(0) data = pd.Index(rs.randint(0, 5, (100, ))) data2 = pd.Index(rs.randint(1, 6, (100, ))) for method in ['min', 'max', 'all', 'any']: idx = md.Index(data) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data, method)()) idx = md.Index(data, chunk_size=10) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data, method)()) idx = md.Index(data2) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data2, method)()) idx = md.Index(data2, chunk_size=10) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data2, method)())
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testPairwiseDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x = mt.tensor(raw_x, chunk_size=11) y = mt.tensor(raw_y, chunk_size=12) d = pairwise_distances(x, y) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y) np.testing.assert_almost_equal(result, expected) # test precomputed d2 = d.copy() d2[0, 0] = -1 d2 = pairwise_distances(d2, y, metric='precomputed') with self.assertRaises(ValueError): _ = self.executor.execute_tensor(d2, concat=True)[0] # test cdist weight = np.random.rand(5) d = pairwise_distances(x, y, metric='wminkowski', p=3, w=weight) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric='wminkowski', p=3, w=weight) np.testing.assert_almost_equal(result, expected) # test pdist d = pairwise_distances(x, metric='hamming') result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, metric='hamming') np.testing.assert_almost_equal(result, expected) # test function metric m = lambda u, v: np.sqrt(((u - v)**2).sum()) d = pairwise_distances(x, y, metric=m) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric=m) np.testing.assert_almost_equal(result, expected) assert_warns(DataConversionWarning, pairwise_distances, x, y, metric='jaccard') with self.assertRaises(ValueError): _ = pairwise_distances(x, y, metric='unknown')
def testRandintExecution(self): size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) arr = tensor.random.randint(0, 2, size=(10, 30), chunk_size=3) size_res = size_executor.execute_tensor(arr, mock=True) self.assertEqual(arr.nbytes, sum(tp[0] for tp in size_res)) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertEqual(res.shape, (10, 30)) self.assertTrue(np.all(res >= 0)) self.assertTrue(np.all(res < 2))
def setUp(self) -> None: this = self class MockSession: @property def executor(self): return this.executor self.ctx = ctx = LocalContext(MockSession()) self.executor = ExecutorForTest('numpy', storage=ctx) ctx.__enter__()
def testSparseRandintExecution(self): size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) arr = tensor.random.randint(1, 2, size=(30, 50), density=.1, chunk_size=10, dtype='f4') size_res = size_executor.execute_tensor(arr, mock=True) self.assertAlmostEqual(arr.nbytes * 0.1, sum(tp[0] for tp in size_res)) res = self.executor.execute_tensor(arr, concat=True)[0] self.assertTrue(issparse(res)) self.assertEqual(res.shape, (30, 50)) self.assertTrue(np.all(res.data >= 1)) self.assertTrue(np.all(res.data < 2)) self.assertAlmostEqual((res >= 1).toarray().sum(), 30 * 50 * .1, delta=20)
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testGammalnExecution(self): raw = np.random.rand(10, 8, 6) a = tensor(raw, chunk_size=3) r = gammaln(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_gammaln(raw) np.testing.assert_array_equal(result, expected) # test sparse raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) a = tensor(raw, chunk_size=3) r = gammaln(a) result = self.executor.execute_tensor(r, concat=True)[0] data = scipy_gammaln(raw.data) expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape) np.testing.assert_array_equal(result.toarray(), expected.toarray()) def testErfExecution(self): raw = np.random.rand(10, 8, 6) a = tensor(raw, chunk_size=3) r = erf(a) result = self.executor.execute_tensor(r, concat=True)[0] expected = scipy_erf(raw) np.testing.assert_array_equal(result, expected) # test sparse raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan])) a = tensor(raw, chunk_size=3) r = erf(a) result = self.executor.execute_tensor(r, concat=True)[0] data = scipy_erf(raw.data) expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape) np.testing.assert_array_equal(result.toarray(), expected.toarray())
def setUp(self): register_mars_backend() self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
def testInputTileable(self): def f(t, x): return (t * x).sum().to_numpy() rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1.sum(axis=0) s = spawn(f, args=(t2, 3)) sess = new_session() sess._sess._executor = ExecutorForTest('numpy', storage=sess._context) result = s.execute(session=sess).fetch(session=sess) expected = (raw.sum(axis=0) * 3).sum() self.assertAlmostEqual(result, expected) df1 = md.DataFrame(raw, chunk_size=3) df1.execute(session=sess) df2 = shuffle(df1) df2.execute(session=sess) def f2(input_df): bonus = input_df.iloc[:, 0].fetch().sum() return input_df.sum().to_pandas() + bonus for df in [df1, df2]: s = spawn(f2, args=(df, )) result = s.execute(session=sess).fetch(session=sess) expected = pd.DataFrame(raw).sum() + raw[:, 0].sum() pd.testing.assert_series_equal(result, expected)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testHaversineDistancesOp(self): # shape[1] != 2 with self.assertRaises(ValueError): haversine_distances(mt.random.rand(10, 3)) # shape[1] != 2 with self.assertRaises(ValueError): haversine_distances(mt.random.rand(10, 2), mt.random.rand(11, 3)) # cannot support sparse with self.assertRaises(TypeError): haversine_distances( mt.random.randint(10, size=(10, 2), density=0.5)) def testHaversineDistancesExecution(self): raw_x = np.random.rand(30, 2) raw_y = np.random.rand(21, 2) # one chunk x1 = mt.tensor(raw_x, chunk_size=30) y1 = mt.tensor(raw_y, chunk_size=30) # multiple chunks x2 = mt.tensor(raw_x, chunk_size=(11, 1)) y2 = mt.tensor(raw_y, chunk_size=(17, 1)) for x, y in [(x1, y1), (x2, y2)]: for use_sklearn in [True, False]: distance = haversine_distances(x, y) distance.op._use_sklearn = use_sklearn result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_haversine_distances(raw_x, raw_y) np.testing.assert_array_equal(result, expected) # test x is y distance = haversine_distances(x) distance.op._use_sklearn = use_sklearn result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_haversine_distances(raw_x, raw_x) np.testing.assert_array_equal(result, expected)
def setUp(self): self.iris = mt.tensor(datasets.load_iris().data) # solver_list not includes arpack self.solver_list = ['full', 'randomized', 'auto'] self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
class TestCustomAggregate(TestBase): def setUp(self): self.executor = ExecutorForTest() def testDataFrameAggregate(self): data = pd.DataFrame(np.random.rand(30, 20)) df = md.DataFrame(data) result = df.agg(MockReduction1()) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction1())) result = df.agg(MockReduction2()) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2())) df = md.DataFrame(data, chunk_size=5) result = df.agg(MockReduction2()) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2())) result = df.agg(MockReduction2()) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2())) def testSeriesAggregate(self): data = pd.Series(np.random.rand(20)) s = md.Series(data) result = s.agg(MockReduction1()) self.assertEqual( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction1())) result = s.agg(MockReduction2()) self.assertEqual( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2())) s = md.Series(data, chunk_size=5) result = s.agg(MockReduction2()) self.assertAlmostEqual( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2())) result = s.agg(MockReduction2()) self.assertAlmostEqual( self.executor.execute_dataframe(result, concat=True)[0], data.agg(MockReduction2()))
class Test(unittest.TestCase): def setUp(self): self.executor = ExecutorForTest('numpy') def testEntropyExecution(self): rs = np.random.RandomState(0) a = rs.rand(10) t1 = tensor(a, chunk_size=4) r = entropy(t1) result = self.executor.execute_tensor(r, concat=True)[0] expected = sp_entropy(a) np.testing.assert_array_almost_equal(result, expected) b = rs.rand(10) base = 3.1 t2 = tensor(b, chunk_size=4) r = entropy(t1, t2, base) result = self.executor.execute_tensor(r, concat=True)[0] expected = sp_entropy(a, b, base) np.testing.assert_array_almost_equal(result, expected) b = rs.rand(10) base = 3.1 t2 = tensor(b, chunk_size=4) r = entropy(t1, t2, base) result = self.executor.execute_tensor(r, concat=True)[0] expected = sp_entropy(a, b, base) np.testing.assert_array_almost_equal(result, expected) r = entropy(t1, t2, t1.sum()) result = self.executor.execute_tensor(r, concat=True)[0] expected = sp_entropy(a, b, a.sum()) np.testing.assert_array_almost_equal(result, expected) with self.assertRaises(ValueError): entropy(t1, t2[:7])
def testSeriesQuantileExecution(self): raw = pd.Series(np.random.rand(10), name='a') a = Series(raw, chunk_size=3) # q = 0.5, scalar r = a.quantile() result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile() self.assertEqual(result, expected) # q is a list r = a.quantile([0.3, 0.7]) result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_series_equal(result, expected) # test interpolation r = a.quantile([0.3, 0.7], interpolation='midpoint') result = self.executor.execute_dataframe(r, concat=True)[0] expected = raw.quantile([0.3, 0.7], interpolation='midpoint') pd.testing.assert_series_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: q = tensor([0.3, 0.7]) # q is a tensor r = a.quantile(q) result = executor.execute_dataframes([r])[0] expected = raw.quantile([0.3, 0.7]) pd.testing.assert_series_equal(result, expected)
class Test(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testToCSVExecution(self): index = pd.RangeIndex(100, 0, -1, name='index') raw = pd.DataFrame( { 'col1': np.random.rand(100), 'col2': np.random.choice(['a', 'b', 'c'], (100, )), 'col3': np.arange(100) }, index=index) df = DataFrame(raw, chunk_size=33) with tempfile.TemporaryDirectory() as base_path: # test one file path = os.path.join(base_path, 'out.csv') r = df.to_csv(path) self.executor.execute_dataframe(r) result = pd.read_csv(path, dtype=raw.dtypes.to_dict()) result.set_index('index', inplace=True) pd.testing.assert_frame_equal(result, raw) # test multi files path = os.path.join(base_path, 'out-*.csv') r = df.to_csv(path) self.executor.execute_dataframe(r) dfs = [ pd.read_csv(os.path.join(base_path, 'out-{}.csv'.format(i)), dtype=raw.dtypes.to_dict()) for i in range(4) ] result = pd.concat(dfs, axis=0) result.set_index('index', inplace=True) pd.testing.assert_frame_equal(result, raw) pd.testing.assert_frame_equal(dfs[1].set_index('index'), raw.iloc[33:66])
def setUp(self): n_rows = 1000 n_columns = 10 chunk_size = 20 rs = mt.random.RandomState(0) self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size) self.y = rs.rand(n_rows, chunk_size=chunk_size) self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
class TestUnary(TestBase): def setUp(self): super().setUp() self.executor = ExecutorForTest() def testAbs(self): data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10))) df1 = from_pandas(data1, chunk_size=5) result = self.executor.execute_dataframe(abs(df1), concat=True)[0] expected = data1.abs() pd.testing.assert_frame_equal(expected, result)
def testHistogramBinEdgesExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram_bin_edges(a, range=range_) result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram_bin_edges(raw, range=range_) np.testing.assert_array_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(bin_edges) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=bins) np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: bin_edges = histogram_bin_edges(t, bins=bins) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(r, bins=[0, 4, 8]) np.testing.assert_array_equal(result, expected) raw = np.arange(5) a = tensor(raw, chunk_size=3) bin_edges = histogram_bin_edges(a) result = executor.execute_tensors([bin_edges])[0] expected = np.histogram_bin_edges(raw) self.assertEqual(bin_edges.shape, expected.shape) np.testing.assert_array_equal(result, expected)
def testHistogramExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(20,)) a = tensor(raw, chunk_size=3) raw_weights = rs.random(20) weights = tensor(raw_weights, chunk_size=4) # range provided for range_ in [(0, 10), (3, 11), (3, 7)]: bin_edges = histogram(a, range=range_)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram(raw, range=range_)[0] np.testing.assert_array_equal(result, expected) for wt in (raw_weights, weights): for density in (True, False): bins = [1, 4, 6, 9] bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0] result = self.executor.execute_tensor(bin_edges)[0] expected = np.histogram( raw, bins=bins, weights=raw_weights, density=density)[0] np.testing.assert_almost_equal(result, expected) this = self class MockSession: def __init__(self): self.executor = this.executor ctx = LocalContext(MockSession()) executor = ExecutorForTest('numpy', storage=ctx) with ctx: raw2 = rs.randint(10, size=(1,)) b = tensor(raw2) raw3 = rs.randint(10, size=(0,)) c = tensor(raw3) for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]: for density in (True, False): test_bins = [10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott', 'sqrt', 'sturges'] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] if r.size > 0: with self.assertRaises(TilesError): executor.execute_tensor(hist) result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=bins, density=density)[0] np.testing.assert_array_equal(result, expected) test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)] for bins in test_bins: hist = histogram(t, bins=bins, density=density)[0] result = executor.execute_tensors([hist])[0] expected = np.histogram(r, bins=[0, 4, 8], density=density)[0] np.testing.assert_array_equal(result, expected)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testCosineDistancesExecution(self): raw_dense_x = np.random.rand(25, 10) raw_dense_y = np.random.rand(17, 10) raw_sparse_x = sps.random(25, 10, density=0.5, format='csr', random_state=0) raw_sparse_y = sps.random(17, 10, density=0.4, format='csr', random_state=1) for raw_x, raw_y in [(raw_dense_x, raw_dense_y), (raw_sparse_x, raw_sparse_y)]: for chunk_size in (25, 6): x = mt.tensor(raw_x, chunk_size=chunk_size) y = mt.tensor(raw_y, chunk_size=chunk_size) d = cosine_distances(x, y) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_cosine_distances(raw_x, raw_y) np.testing.assert_almost_equal(np.asarray(result), expected) d = cosine_distances(x) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_cosine_distances(raw_x) np.testing.assert_almost_equal(np.asarray(result), expected)
class Test(unittest.TestCase): def setUp(self) -> None: self.executor = ExecutorForTest('numpy') def testAggregateResult(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) t = tensor(raw, chunk_size=6) slc = slice(None, None, 3) # test no reorder fancy_index = np.array([3, 6, 7]) indexes = [slc, fancy_index] result = t[indexes].tiles() handler = NDArrayIndexesHandler() context = handler.handle(result.op, return_context=True) self.assertGreater(context.op.outputs[0].chunk_shape[-1], 1) chunk_results = self.executor.execute_tensor(result) chunk_results = \ [(c.index, r) for c, r in zip(get_tiled(result).chunks, chunk_results)] expected = self.executor.execute_tensor(result, concat=True)[0] res = handler.aggregate_result(context, chunk_results) np.testing.assert_array_equal(res, expected) # test fancy index that requires reordering fancy_index = np.array([6, 7, 3]) indexes = [slc, fancy_index] test = t[indexes].tiles() context = handler.handle(test.op, return_context=True) self.assertEqual(context.op.outputs[0].chunk_shape[-1], 1) res = handler.aggregate_result(context, chunk_results) expected = self.executor.execute_tensor(test, concat=True)[0] np.testing.assert_array_equal(res, expected)
def setUp(self) -> None: self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context) self.estimators = [(LabelPropagation, { 'kernel': 'rbf' }), (LabelPropagation, { 'kernel': 'knn', 'n_neighbors': 2 }), (LabelPropagation, { 'kernel': lambda x, y: rbf_kernel(x, y, gamma=20) })]
def testInputTileable(self): def f(t, x): return (t * x).sum().to_numpy() rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1.sum(axis=0) s = spawn(f, args=(t2, 3)) sess = new_session() sess._sess._executor = ExecutorForTest('numpy', storage=sess._context) result = s.execute(session=sess).fetch(session=sess) expected = (raw.sum(axis=0) * 3).sum() self.assertAlmostEqual(result, expected)
def setUp(self): n_rows = 1000 n_columns = 10 chunk_size = 20 rs = mt.random.RandomState(0) self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size) self.y = rs.rand(n_rows, chunk_size=chunk_size) self.X_df = md.DataFrame(self.X) x_sparse = np.random.rand(n_rows, n_columns) x_sparse[np.arange(n_rows), np.random.randint(n_columns, size=n_rows)] = np.nan self.X_sparse = mt.tensor( x_sparse, chunk_size=chunk_size).tosparse(missing=np.nan) self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
def setUp(self) -> None: self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context) rng = mt.random.RandomState(0) self.n_features = n_features = 30 self.n_samples = n_samples = 1000 offsets = rng.uniform(-1, 1, size=n_features) scales = rng.uniform(1, 10, size=n_features) self.X_2d = X_2d = rng.randn(n_samples, n_features) * scales + offsets self.X_1row = X_1row = X_2d[0, :].reshape(1, n_features) self.X_1col = X_1col = X_2d[:, 0].reshape(n_samples, 1) self.X_list_1row = X_1row.to_numpy().tolist() self.X_list_1col = X_1col.to_numpy().tolist() self.iris = mt.tensor(load_iris().data)
class TestGPUReduction(TestBase): def setUp(self): self.executor = ExecutorForTest() def testGPUExecution(self): df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc')) df = to_gpu(md.DataFrame(df_raw, chunk_size=6)) r = df.sum() res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum()) r = df.kurt() res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt()) r = df.agg(['sum', 'var']) res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(res.to_pandas(), df_raw.agg(['sum', 'var'])) s_raw = pd.Series(np.random.rand(30)) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.sum() res = self.executor.execute_dataframe(r, concat=True)[0] self.assertAlmostEqual(res, s_raw.sum()) r = s.kurt() res = self.executor.execute_dataframe(r, concat=True)[0] self.assertAlmostEqual(res, s_raw.kurt()) r = s.agg(['sum', 'var']) res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), s_raw.agg(['sum', 'var'])) s_raw = pd.Series( np.random.randint(0, 3, size=(30, )) * np.random.randint(0, 5, size=(30, ))) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.unique() res = self.executor.execute_dataframe(r, concat=True)[0] np.testing.assert_array_equal( cp.asnumpy(res).sort(), s_raw.unique().sort())
def testUnknownShapeInputs(self): def f(t, x): assert all(not np.isnan(s) for s in t.shape) return (t * x).sum().to_numpy(check_nsplits=False) rs = np.random.RandomState(0) raw = rs.rand(5, 4) t1 = mt.tensor(raw, chunk_size=3) t2 = t1[t1 > 0] s = spawn(f, args=(t2, 3)) sess = new_session() sess._sess._executor = ExecutorForTest('numpy', storage=sess._context) result = s.execute(session=sess).fetch(session=sess) expected = (raw[raw > 0] * 3).sum() self.assertAlmostEqual(result, expected)
def setUp(self): # Make an X that looks somewhat like a small tf-idf matrix. # XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this. shape = 60, 55 n_samples, n_features = shape rng = check_random_state(42) X = rng.randint(-100, 20, np.product(shape)).reshape(shape) X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64) X.data[:] = 1 + np.log(X.data) self.X = X self.Xdense = X.A self.n_samples = n_samples self.n_features = n_features self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)