コード例 #1
0
    def testSessionAsyncExecute(self):
        raw_a = np.random.RandomState(0).rand(10, 20)
        a = mt.tensor(raw_a)

        expected = raw_a.sum()
        res = a.sum().to_numpy(wait=False).result()
        self.assertEqual(expected, res)
        res = a.sum().execute(wait=False)
        res = res.result().fetch()
        self.assertEqual(expected, res)

        raw_df = pd.DataFrame(raw_a)

        expected = raw_df.sum()
        df = md.DataFrame(a)
        res = df.sum().to_pandas(wait=False).result()
        pd.testing.assert_series_equal(expected, res)
        res = df.sum().execute(wait=False)
        res = res.result().fetch()
        pd.testing.assert_series_equal(expected, res)

        t = [df.sum(), a.sum()]
        res = mt.ExecutableTuple(t).to_object(wait=False).result()
        pd.testing.assert_series_equal(raw_df.sum(), res[0])
        self.assertEqual(raw_a.sum(), res[1])
        res = mt.ExecutableTuple(t).execute(wait=False)
        res = res.result().fetch()
        pd.testing.assert_series_equal(raw_df.sum(), res[0])
        self.assertEqual(raw_a.sum(), res[1])
コード例 #2
0
def test_make_blobs_n_samples_list(setup):
    n_samples = [50, 30, 20]
    X, y = make_blobs(n_samples=n_samples, n_features=2, random_state=0)
    X, y = mt.ExecutableTuple((X, y)).execute().fetch()

    assert X.shape == (sum(n_samples), 2)
    assert all(np.bincount(y, minlength=len(n_samples)) == n_samples) is True
コード例 #3
0
    def testMakeBlobsNSamplesList(self):
        n_samples = [50, 30, 20]
        X, y = make_blobs(n_samples=n_samples, n_features=2, random_state=0)
        X, y = mt.ExecutableTuple((X, y)).execute()

        self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch")
        self.assertTrue(
            all(np.bincount(y, minlength=len(n_samples)) == n_samples),
            "Incorrect number of samples per blob")
コード例 #4
0
def test_make_blobs_n_samples_centers_none(setup):
    for n_samples in [[5, 3, 0], np.array([5, 3, 0]), tuple([5, 3, 0])]:
        centers = None
        X, y = make_blobs(n_samples=n_samples, centers=centers, random_state=0)
        X, y = mt.ExecutableTuple((X, y)).execute().fetch()

        assert X.shape == (sum(n_samples), 2)
        assert all(
            np.bincount(y, minlength=len(n_samples)) == n_samples) is True
コード例 #5
0
ファイル: test_session.py プロジェクト: fyrestone/mars
def test_executable_tuple_execute(setup):
    raw_a = np.random.RandomState(0).rand(10, 20)
    a = mt.tensor(raw_a)

    raw_df = pd.DataFrame(raw_a)
    df = md.DataFrame(raw_df)

    tp = test_namedtuple_type(a, df)
    executable_tp = mt.ExecutableTuple(tp)

    assert 'a' in dir(executable_tp)
    assert executable_tp.a is a
    assert test_namedtuple_type.__name__ in repr(executable_tp)
    with pytest.raises(AttributeError):
        getattr(executable_tp, 'c')

    res = mt.ExecutableTuple(tp).execute().fetch()
    assert test_namedtuple_type is type(res)

    np.testing.assert_array_equal(raw_a, res.a)
    pd.testing.assert_frame_equal(raw_df, res.b)
コード例 #6
0
    def testExecutableTupleExecute(self):
        raw_a = np.random.RandomState(0).rand(10, 20)
        a = mt.tensor(raw_a)

        raw_df = pd.DataFrame(raw_a)
        df = md.DataFrame(raw_df)

        tp = test_namedtuple_type(a, df)
        executable_tp = mt.ExecutableTuple(tp)

        self.assertIn('a', dir(executable_tp))
        self.assertIs(executable_tp.a, a)
        self.assertIn(test_namedtuple_type.__name__, repr(executable_tp))
        with self.assertRaises(AttributeError):
            getattr(executable_tp, 'c')

        res = mt.ExecutableTuple(tp).execute().fetch()
        self.assertIs(test_namedtuple_type, type(res))

        np.testing.assert_array_equal(raw_a, res.a)
        pd.testing.assert_frame_equal(raw_df, res.b)
コード例 #7
0
    def testMakeBlobsNSamplesCentersNone(self):
        for n_samples in [[5, 3, 0], np.array([5, 3, 0]), tuple([5, 3, 0])]:
            centers = None
            X, y = make_blobs(n_samples=n_samples,
                              centers=centers,
                              random_state=0)
            X, y = mt.ExecutableTuple((X, y)).execute()

            self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch")
            self.assertTrue(
                all(np.bincount(y, minlength=len(n_samples)) == n_samples),
                "Incorrect number of samples per blob")
コード例 #8
0
def test_make_blobs_n_samples_list_with_centers(setup):
    n_samples = [20, 20, 20]
    centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
    cluster_stds = np.array([0.05, 0.2, 0.4])
    X, y = make_blobs(n_samples=n_samples,
                      centers=centers,
                      cluster_std=cluster_stds,
                      random_state=0)
    X, y = mt.ExecutableTuple((X, y)).execute().fetch()

    assert X.shape == (sum(n_samples), 2)
    assert all(np.bincount(y, minlength=len(n_samples)) == n_samples) is True
    for i, (ctr, std) in enumerate(zip(centers, cluster_stds)):
        assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
コード例 #9
0
def test_make_blobs(setup):
    cluster_stds = np.array([0.05, 0.2, 0.4])
    cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
    X, y = make_blobs(random_state=0,
                      n_samples=50,
                      n_features=2,
                      centers=cluster_centers,
                      cluster_std=cluster_stds)
    X, y = mt.ExecutableTuple((X, y)).execute().fetch()
    assert X.shape == (50, 2)
    assert y.shape == (50, )
    assert np.unique(y).shape == (3, )
    for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)):
        assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
コード例 #10
0
 def testMakeBlobs(self):
     cluster_stds = np.array([0.05, 0.2, 0.4])
     cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
     X, y = make_blobs(random_state=0,
                       n_samples=50,
                       n_features=2,
                       centers=cluster_centers,
                       cluster_std=cluster_stds)
     X, y = mt.ExecutableTuple((X, y)).execute()
     self.assertEqual(X.shape, (50, 2), "X shape mismatch")
     self.assertEqual(y.shape, (50, ), "y shape mismatch")
     self.assertEqual(
         np.unique(y).shape, (3, ), "Unexpected number of blobs")
     for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)):
         assert_almost_equal((X[y == i] - ctr).std(), std, 1,
                             "Unexpected std")
コード例 #11
0
    def testMakeBlobsNSamplesListWithCenters(self):
        n_samples = [20, 20, 20]
        centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
        cluster_stds = np.array([0.05, 0.2, 0.4])
        X, y = make_blobs(n_samples=n_samples,
                          centers=centers,
                          cluster_std=cluster_stds,
                          random_state=0)
        X, y = mt.ExecutableTuple((X, y)).execute()

        self.assertEqual(X.shape, (sum(n_samples), 2), "X shape mismatch")
        self.assertTrue(
            all(np.bincount(y, minlength=len(n_samples)) == n_samples),
            "Incorrect number of samples per blob")
        for i, (ctr, std) in enumerate(zip(centers, cluster_stds)):
            assert_almost_equal((X[y == i] - ctr).std(), std, 1,
                                "Unexpected std")
コード例 #12
0
    def testMakeClassificationInformativeFeatures(self):
        """Test the construction of informative features in make_classification

        Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and
        fully-specified `weights`.
        """
        # Create very separate clusters; check that vertices are unique and
        # correspond to classes
        class_sep = 1e6
        make = partial(make_classification,
                       class_sep=class_sep,
                       n_redundant=0,
                       n_repeated=0,
                       flip_y=0,
                       shift=0,
                       scale=1,
                       shuffle=False)

        for n_informative, weights, n_clusters_per_class in [
            (2, [1], 1), (2, [1 / 3] * 3, 1), (2, [1 / 4] * 4, 1),
            (2, [1 / 2] * 2, 2), (2, [3 / 4, 1 / 4], 2), (10, [1 / 3] * 3, 10),
            (np.int(64), [1], 1)
        ]:
            n_classes = len(weights)
            n_clusters = n_classes * n_clusters_per_class
            n_samples = n_clusters * 50

            for hypercube in (False, True):
                generated = make(n_samples=n_samples,
                                 n_classes=n_classes,
                                 weights=weights,
                                 n_features=n_informative,
                                 n_informative=n_informative,
                                 n_clusters_per_class=n_clusters_per_class,
                                 hypercube=hypercube,
                                 random_state=0)

                X, y = mt.ExecutableTuple(generated).execute()
                self.assertEqual(X.shape, (n_samples, n_informative))
                self.assertEqual(y.shape, (n_samples, ))

                # Cluster by sign, viewed as strings to allow uniquing
                signs = np.sign(X)
                signs = signs.view(dtype='|S{0}'.format(signs.strides[0]))
                unique_signs, cluster_index = np.unique(signs,
                                                        return_inverse=True)

                self.assertEqual(
                    len(unique_signs), n_clusters,
                    "Wrong number of clusters, or not in distinct "
                    "quadrants")

                clusters_by_class = defaultdict(set)
                for cluster, cls in zip(cluster_index, y):
                    clusters_by_class[cls].add(cluster)
                for clusters in clusters_by_class.values():
                    self.assertEqual(len(clusters), n_clusters_per_class,
                                     "Wrong number of clusters per class")
                self.assertEqual(len(clusters_by_class), n_classes,
                                 "Wrong number of classes")

                assert_array_almost_equal(np.bincount(y) / len(y) // weights,
                                          [1] * n_classes,
                                          err_msg="Wrong number of samples "
                                          "per class")

                # Ensure on vertices of hypercube
                for cluster in range(len(unique_signs)):
                    centroid = X[cluster_index == cluster].mean(axis=0)
                    if hypercube:
                        assert_array_almost_equal(np.abs(centroid) / class_sep,
                                                  np.ones(n_informative),
                                                  decimal=5,
                                                  err_msg="Clusters are not "
                                                  "centered on hypercube "
                                                  "vertices")
                    else:
                        assert_raises(
                            AssertionError,
                            assert_array_almost_equal,
                            np.abs(centroid) / class_sep,
                            np.ones(n_informative),
                            decimal=5,
                            err_msg="Clusters should not be centered "
                            "on hypercube vertices")

        assert_raises(ValueError,
                      make,
                      n_features=2,
                      n_informative=2,
                      n_classes=5,
                      n_clusters_per_class=1)
        assert_raises(ValueError,
                      make,
                      n_features=2,
                      n_informative=2,
                      n_classes=3,
                      n_clusters_per_class=2)