def testSessionAsyncExecute(self): raw_a = np.random.RandomState(0).rand(10, 20) a = mt.tensor(raw_a) expected = raw_a.sum() res = a.sum().to_numpy(wait=False).result() self.assertEqual(expected, res) res = a.sum().execute(wait=False) res = res.result().fetch() self.assertEqual(expected, res) raw_df = pd.DataFrame(raw_a) expected = raw_df.sum() df = md.DataFrame(a) res = df.sum().to_pandas(wait=False).result() pd.testing.assert_series_equal(expected, res) res = df.sum().execute(wait=False) res = res.result().fetch() pd.testing.assert_series_equal(expected, res) t = [df.sum(), a.sum()] res = mt.ExecutableTuple(t).to_object(wait=False).result() pd.testing.assert_series_equal(raw_df.sum(), res[0]) self.assertEqual(raw_a.sum(), res[1]) res = mt.ExecutableTuple(t).execute(wait=False) res = res.result().fetch() pd.testing.assert_series_equal(raw_df.sum(), res[0]) self.assertEqual(raw_a.sum(), res[1])
def test_make_blobs_n_samples_list(setup): n_samples = [50, 30, 20] X, y = make_blobs(n_samples=n_samples, n_features=2, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute().fetch() assert X.shape == (sum(n_samples), 2) assert all(np.bincount(y, minlength=len(n_samples)) == n_samples) is True
def testMakeBlobsNSamplesList(self): n_samples = [50, 30, 20] X, y = make_blobs(n_samples=n_samples, n_features=2, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute() self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch") self.assertTrue( all(np.bincount(y, minlength=len(n_samples)) == n_samples), "Incorrect number of samples per blob")
def test_make_blobs_n_samples_centers_none(setup): for n_samples in [[5, 3, 0], np.array([5, 3, 0]), tuple([5, 3, 0])]: centers = None X, y = make_blobs(n_samples=n_samples, centers=centers, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute().fetch() assert X.shape == (sum(n_samples), 2) assert all( np.bincount(y, minlength=len(n_samples)) == n_samples) is True
def test_executable_tuple_execute(setup): raw_a = np.random.RandomState(0).rand(10, 20) a = mt.tensor(raw_a) raw_df = pd.DataFrame(raw_a) df = md.DataFrame(raw_df) tp = test_namedtuple_type(a, df) executable_tp = mt.ExecutableTuple(tp) assert 'a' in dir(executable_tp) assert executable_tp.a is a assert test_namedtuple_type.__name__ in repr(executable_tp) with pytest.raises(AttributeError): getattr(executable_tp, 'c') res = mt.ExecutableTuple(tp).execute().fetch() assert test_namedtuple_type is type(res) np.testing.assert_array_equal(raw_a, res.a) pd.testing.assert_frame_equal(raw_df, res.b)
def testExecutableTupleExecute(self): raw_a = np.random.RandomState(0).rand(10, 20) a = mt.tensor(raw_a) raw_df = pd.DataFrame(raw_a) df = md.DataFrame(raw_df) tp = test_namedtuple_type(a, df) executable_tp = mt.ExecutableTuple(tp) self.assertIn('a', dir(executable_tp)) self.assertIs(executable_tp.a, a) self.assertIn(test_namedtuple_type.__name__, repr(executable_tp)) with self.assertRaises(AttributeError): getattr(executable_tp, 'c') res = mt.ExecutableTuple(tp).execute().fetch() self.assertIs(test_namedtuple_type, type(res)) np.testing.assert_array_equal(raw_a, res.a) pd.testing.assert_frame_equal(raw_df, res.b)
def testMakeBlobsNSamplesCentersNone(self): for n_samples in [[5, 3, 0], np.array([5, 3, 0]), tuple([5, 3, 0])]: centers = None X, y = make_blobs(n_samples=n_samples, centers=centers, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute() self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch") self.assertTrue( all(np.bincount(y, minlength=len(n_samples)) == n_samples), "Incorrect number of samples per blob")
def test_make_blobs_n_samples_list_with_centers(setup): n_samples = [20, 20, 20] centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]]) cluster_stds = np.array([0.05, 0.2, 0.4]) X, y = make_blobs(n_samples=n_samples, centers=centers, cluster_std=cluster_stds, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute().fetch() assert X.shape == (sum(n_samples), 2) assert all(np.bincount(y, minlength=len(n_samples)) == n_samples) is True for i, (ctr, std) in enumerate(zip(centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
def test_make_blobs(setup): cluster_stds = np.array([0.05, 0.2, 0.4]) cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]]) X, y = make_blobs(random_state=0, n_samples=50, n_features=2, centers=cluster_centers, cluster_std=cluster_stds) X, y = mt.ExecutableTuple((X, y)).execute().fetch() assert X.shape == (50, 2) assert y.shape == (50, ) assert np.unique(y).shape == (3, ) for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
def testMakeBlobs(self): cluster_stds = np.array([0.05, 0.2, 0.4]) cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]]) X, y = make_blobs(random_state=0, n_samples=50, n_features=2, centers=cluster_centers, cluster_std=cluster_stds) X, y = mt.ExecutableTuple((X, y)).execute() self.assertEqual(X.shape, (50, 2), "X shape mismatch") self.assertEqual(y.shape, (50, ), "y shape mismatch") self.assertEqual( np.unique(y).shape, (3, ), "Unexpected number of blobs") for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
def testMakeBlobsNSamplesListWithCenters(self): n_samples = [20, 20, 20] centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]]) cluster_stds = np.array([0.05, 0.2, 0.4]) X, y = make_blobs(n_samples=n_samples, centers=centers, cluster_std=cluster_stds, random_state=0) X, y = mt.ExecutableTuple((X, y)).execute() self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch") self.assertTrue( all(np.bincount(y, minlength=len(n_samples)) == n_samples), "Incorrect number of samples per blob") for i, (ctr, std) in enumerate(zip(centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
def testMakeClassificationInformativeFeatures(self): """Test the construction of informative features in make_classification Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and fully-specified `weights`. """ # Create very separate clusters; check that vertices are unique and # correspond to classes class_sep = 1e6 make = partial(make_classification, class_sep=class_sep, n_redundant=0, n_repeated=0, flip_y=0, shift=0, scale=1, shuffle=False) for n_informative, weights, n_clusters_per_class in [ (2, [1], 1), (2, [1 / 3] * 3, 1), (2, [1 / 4] * 4, 1), (2, [1 / 2] * 2, 2), (2, [3 / 4, 1 / 4], 2), (10, [1 / 3] * 3, 10), (np.int(64), [1], 1) ]: n_classes = len(weights) n_clusters = n_classes * n_clusters_per_class n_samples = n_clusters * 50 for hypercube in (False, True): generated = make(n_samples=n_samples, n_classes=n_classes, weights=weights, n_features=n_informative, n_informative=n_informative, n_clusters_per_class=n_clusters_per_class, hypercube=hypercube, random_state=0) X, y = mt.ExecutableTuple(generated).execute() self.assertEqual(X.shape, (n_samples, n_informative)) self.assertEqual(y.shape, (n_samples, )) # Cluster by sign, viewed as strings to allow uniquing signs = np.sign(X) signs = signs.view(dtype='|S{0}'.format(signs.strides[0])) unique_signs, cluster_index = np.unique(signs, return_inverse=True) self.assertEqual( len(unique_signs), n_clusters, "Wrong number of clusters, or not in distinct " "quadrants") clusters_by_class = defaultdict(set) for cluster, cls in zip(cluster_index, y): clusters_by_class[cls].add(cluster) for clusters in clusters_by_class.values(): self.assertEqual(len(clusters), n_clusters_per_class, "Wrong number of clusters per class") self.assertEqual(len(clusters_by_class), n_classes, "Wrong number of classes") assert_array_almost_equal(np.bincount(y) / len(y) // weights, [1] * n_classes, err_msg="Wrong number of samples " "per class") # Ensure on vertices of hypercube for cluster in range(len(unique_signs)): centroid = X[cluster_index == cluster].mean(axis=0) if hypercube: assert_array_almost_equal(np.abs(centroid) / class_sep, np.ones(n_informative), decimal=5, err_msg="Clusters are not " "centered on hypercube " "vertices") else: assert_raises( AssertionError, assert_array_almost_equal, np.abs(centroid) / class_sep, np.ones(n_informative), decimal=5, err_msg="Clusters should not be centered " "on hypercube vertices") assert_raises(ValueError, make, n_features=2, n_informative=2, n_classes=5, n_clusters_per_class=1) assert_raises(ValueError, make, n_features=2, n_informative=2, n_classes=3, n_clusters_per_class=2)