Python resplit Examples

Programming Language: Python

Namespace/Package Name: heat

Method/Function: resplit

Examples at hotexamples.com: 9

Python resplit - 9 examples found. These are the top rated real world Python examples of heat.resplit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_random.py Project: tkurze/heat

    def test_randperm(self):
        state = torch.random.get_rng_state()

        # results
        a = ht.random.randperm(10, dtype=ht.int32)
        b = ht.random.randperm(4, dtype=ht.float32, split=0)
        c = ht.random.randperm(5, split=0)
        d = ht.random.randperm(5, dtype=ht.float64)

        torch.random.set_rng_state(state)

        # torch results to compare to
        a_cmp = torch.randperm(10, dtype=torch.int32, device=self.device.torch_device)
        b_cmp = torch.randperm(4, dtype=torch.float32, device=self.device.torch_device)
        c_cmp = torch.randperm(5, dtype=torch.int64, device=self.device.torch_device)
        d_cmp = torch.randperm(5, dtype=torch.float64, device=self.device.torch_device)

        self.assertEqual(a.dtype, ht.int32)
        self.assertTrue((a.larray == a_cmp).all())
        self.assertEqual(b.dtype, ht.float32)
        self.assertTrue((ht.resplit(b).larray == b_cmp).all())
        self.assertEqual(c.dtype, ht.int64)
        self.assertTrue((ht.resplit(c).larray == c_cmp).all())
        self.assertEqual(d.dtype, ht.float64)
        self.assertTrue((d.larray == d_cmp).all())

        with self.assertRaises(TypeError):
            ht.random.randperm("abc")

Example #2

Show file

    def fit(self, X, y, sample_weight=None):
        """
        Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : ht.tensor of shape (n_samples, n_features)
            Training set, where n_samples is the number of samples
            and n_features is the number of features.
        y : ht.tensor of shape (n_samples,)
            Labels for training set.
        sample_weight : ht.tensor of shape (n_samples,), optional (default=None)
            Weights applied to individual samples (1. for unweighted).

        Returns
        -------
        self : object
        """
        # sanitize input - to be moved to sanitation module, cf. #468
        if not isinstance(X, ht.DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(X)))
        if not isinstance(y, ht.DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(y)))
        if y.numdims != 1:
            raise ValueError("expected y to be a 1-D tensor, is {}-D".format(
                y.numdims))
        if sample_weight is not None:
            if not isinstance(sample_weight, ht.DNDarray):
                raise ValueError(
                    "sample_weight needs to be a ht.DNDarray, but was {}".
                    format(type(sample_weight)))
        classes = ht.unique(y, sorted=True)
        if classes.split is not None:
            classes = ht.resplit(classes, axis=None)

        return self.__partial_fit(X,
                                  y,
                                  classes,
                                  _refit=True,
                                  sample_weight=sample_weight)

Example #3

Show file

File: test_tiling.py Project: hixio-mh/heat

 def test_misc_coverage(self):
     length = torch.tensor([i + 5 for i in range(3)], device=self.device.torch_device)
     test = torch.arange(
         torch.prod(length), dtype=torch.float64, device=self.device.torch_device
     ).reshape([i + 5 for i in range(3)])
     a = ht.array(test, split=None)
     tiles = ht.tiling.SplitTiles(a)
     self.assertTrue(torch.all(tiles.tile_locations == a.comm.rank))
     a = ht.resplit(a, 0)
     tiles = ht.tiling.SplitTiles(a)
     if a.comm.size == 3:
         # definition of adjusting tests is he same logic as the code itself,
         #   therefore, fixed tests are issued for one process confic
         tile_dims = torch.tensor(
             [[2.0, 2.0, 1.0], [2.0, 2.0, 2.0], [3.0, 2.0, 2.0]], device=self.device.torch_device
         )
         res = tiles.tile_dimensions
         self.assertTrue(torch.equal(tile_dims, res))
         testing_tensor = torch.tensor(
             [
                 [
                     [168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0],
                     [175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0],
                     [182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0],
                     [189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0],
                     [196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0],
                     [203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0],
                 ]
             ],
             dtype=torch.float64,
             device=self.device.torch_device,
         )
         if a.comm.rank == 2:
             self.assertTrue(torch.equal(tiles[2], testing_tensor))
         tiles[2] = 1000
         sl = tiles[2]
         if a.comm.rank == 2:
             self.assertEqual(torch.Size([1, 6, 7]), sl.shape)
             self.assertTrue(torch.all(sl == 1000))
         else:
             self.assertTrue(sl is None)

Example #4

Show file

    def fit(self,
            x: DNDarray,
            y: DNDarray,
            sample_weight: Optional[DNDarray] = None):
        """
        Fit Gaussian Naive Bayes according to ``x`` and ``y``

        Parameters
        ----------
        x : DNDarray
            Training set, where n_samples is the number of samples
            and n_features is the number of features.  Shape = (n_classes, n_features)
        y : DNDarray
            Labels for training set. Shape = (n_samples, )
        sample_weight : DNDarray, optional
            Weights applied to individual samples (1. for unweighted). Shape = (n_samples, )
        """
        # sanitize input - to be moved to sanitation module, cf. #468
        if not isinstance(x, ht.DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(x)))
        if not isinstance(y, ht.DNDarray):
            raise ValueError(
                "input needs to be a ht.DNDarray, but was {}".format(type(y)))
        if y.ndim != 1:
            raise ValueError("expected y to be a 1-D tensor, is {}-D".format(
                y.ndim))
        if sample_weight is not None:
            if not isinstance(sample_weight, ht.DNDarray):
                raise ValueError(
                    "sample_weight needs to be a ht.DNDarray, but was {}".
                    format(type(sample_weight)))
        classes = ht.unique(y, sorted=True)
        if classes.split is not None:
            classes = ht.resplit(classes, axis=None)

        return self.__partial_fit(x,
                                  y,
                                  classes,
                                  _refit=True,
                                  sample_weight=sample_weight)

Example #5

Show file

    def test_permutation(self):
        # Reset RNG
        ht.random.seed()
        state = torch.random.get_rng_state()

        # results
        a = ht.random.permutation(10)

        b_arr = ht.arange(10, dtype=ht.float32)
        b = ht.random.permutation(ht.resplit(b_arr, 0))

        c_arr = ht.arange(16).reshape((4, 4))
        c = ht.random.permutation(c_arr)

        c0 = ht.random.permutation(ht.resplit(c_arr, 0))
        c1 = ht.random.permutation(ht.resplit(c_arr, 1))

        torch.set_rng_state(state)

        # torch results to compare to
        a_cmp = torch.randperm(a.shape[0], device=self.device.torch_device)
        b_cmp = b_arr.larray[torch.randperm(b.shape[0],
                                            device=self.device.torch_device)]
        c_cmp = c_arr.larray[torch.randperm(c.shape[0],
                                            device=self.device.torch_device)]
        c0_cmp = c_arr.larray[torch.randperm(c.shape[0],
                                             device=self.device.torch_device)]
        c1_cmp = c_arr.larray[torch.randperm(c.shape[0],
                                             device=self.device.torch_device)]

        # compare
        self.assertEqual(a.dtype, ht.int64)
        self.assertTrue((a.larray == a_cmp).all())
        self.assertEqual(b.dtype, ht.float32)
        self.assertTrue((ht.resplit(b).larray == b_cmp).all())
        self.assertTrue((c.larray == c_cmp).all())
        self.assertTrue((ht.resplit(c0).larray == c0_cmp).all())
        self.assertTrue((ht.resplit(c1).larray == c1_cmp).all())

        with self.assertRaises(TypeError):
            ht.random.permutation("abc")

Example #6

Show file

    def test_resplit(self):
        # resplitting with same axis, should leave everything unchanged
        shape = (ht.MPI_WORLD.size, ht.MPI_WORLD.size)
        data = ht.zeros(shape, split=None, device=ht_device)
        data2 = ht.resplit(data, None)

        self.assertIsInstance(data2, ht.DNDarray)
        self.assertEqual(data2.shape, shape)
        self.assertEqual(data2.lshape, shape)
        self.assertEqual(data2.split, None)

        # resplitting with same axis, should leave everything unchanged
        shape = (ht.MPI_WORLD.size, ht.MPI_WORLD.size)
        data = ht.zeros(shape, split=1, device=ht_device)
        data2 = ht.resplit(data, 1)

        self.assertIsInstance(data2, ht.DNDarray)
        self.assertEqual(data2.shape, shape)
        self.assertEqual(data2.lshape, (data.comm.size, 1))
        self.assertEqual(data2.split, 1)

        # splitting an unsplit tensor should result in slicing the tensor locally
        shape = (ht.MPI_WORLD.size, ht.MPI_WORLD.size)
        data = ht.zeros(shape, device=ht_device)
        data2 = ht.resplit(data, 1)

        self.assertIsInstance(data2, ht.DNDarray)
        self.assertEqual(data2.shape, shape)
        self.assertEqual(data2.lshape, (data.comm.size, 1))
        self.assertEqual(data2.split, 1)

        # unsplitting, aka gathering a tensor
        shape = (ht.MPI_WORLD.size + 1, ht.MPI_WORLD.size)
        data = ht.ones(shape, split=0, device=ht_device)
        data2 = ht.resplit(data, None)

        self.assertIsInstance(data2, ht.DNDarray)
        self.assertEqual(data2.shape, shape)
        self.assertEqual(data2.lshape, shape)
        self.assertEqual(data2.split, None)

        # assign and entirely new split axis
        shape = (ht.MPI_WORLD.size + 2, ht.MPI_WORLD.size + 1)
        data = ht.ones(shape, split=0, device=ht_device)
        data2 = ht.resplit(data, 1)

        self.assertIsInstance(data2, ht.DNDarray)
        self.assertEqual(data2.shape, shape)
        self.assertEqual(data2.lshape[0], ht.MPI_WORLD.size + 2)
        self.assertTrue(data2.lshape[1] == 1 or data2.lshape[1] == 2)
        self.assertEqual(data2.split, 1)

        # test sorting order of resplit

        N = ht.MPI_WORLD.size
        reference_tensor = ht.zeros((N, N + 1, 2 * N))
        for n in range(N):
            for m in range(N + 1):
                reference_tensor[n, m, :] = ht.arange(0, 2 * N) + m * 10 + n * 100

        # split along axis = 0
        resplit_tensor = ht.resplit(reference_tensor, axis=0)
        local_shape = (1, N + 1, 2 * N)
        local_tensor = reference_tensor[ht.MPI_WORLD.rank, :, :]
        self.assertEqual(resplit_tensor.lshape, local_shape)
        self.assertTrue((resplit_tensor._DNDarray__array == local_tensor._DNDarray__array).all())

        # unsplit
        unsplit_tensor = ht.resplit(resplit_tensor, axis=None)
        self.assertTrue(
            (unsplit_tensor._DNDarray__array == reference_tensor._DNDarray__array).all()
        )

        # split along axis = 1
        resplit_tensor = ht.resplit(unsplit_tensor, axis=1)
        if ht.MPI_WORLD.rank == 0:
            local_shape = (N, 2, 2 * N)
            local_tensor = reference_tensor[:, 0:2, :]
        else:
            local_shape = (N, 1, 2 * N)
            local_tensor = reference_tensor[:, ht.MPI_WORLD.rank + 1 : ht.MPI_WORLD.rank + 2, :]

        self.assertEqual(resplit_tensor.lshape, local_shape)
        self.assertTrue((resplit_tensor._DNDarray__array == local_tensor._DNDarray__array).all())

        # unsplit
        unsplit_tensor = ht.resplit(resplit_tensor, axis=None)
        self.assertTrue(
            (unsplit_tensor._DNDarray__array == reference_tensor._DNDarray__array).all()
        )

        # split along axis = 2
        resplit_tensor = ht.resplit(unsplit_tensor, axis=2)
        local_shape = (N, N + 1, 2)
        local_tensor = reference_tensor[:, :, 2 * ht.MPI_WORLD.rank : 2 * ht.MPI_WORLD.rank + 2]

        self.assertEqual(resplit_tensor.lshape, local_shape)
        self.assertTrue((resplit_tensor._DNDarray__array == local_tensor._DNDarray__array).all())

Example #7

Show file

    def test_fit_iris(self):
        # load sklearn train/test sets and resulting probabilities
        X_train = ht.load(
            "heat/datasets/data/iris_X_train.csv", sep=";", dtype=ht.float64, device=ht_device
        )
        X_test = ht.load(
            "heat/datasets/data/iris_X_test.csv", sep=";", dtype=ht.float64, device=ht_device
        )
        y_train = ht.load(
            "heat/datasets/data/iris_y_train.csv", sep=";", dtype=ht.int64, device=ht_device
        ).squeeze()
        y_test = ht.load(
            "heat/datasets/data/iris_y_test.csv", sep=";", dtype=ht.int64, device=ht_device
        ).squeeze()
        y_pred_proba_sklearn = ht.load(
            "heat/datasets/data/iris_y_pred_proba.csv", sep=";", dtype=ht.float64, device=ht_device
        )

        # test ht.GaussianNB
        from heat.naive_bayes import GaussianNB

        gnb_heat = GaussianNB()
        self.assertEqual(gnb_heat.priors, None)
        with self.assertRaises(AttributeError):
            gnb_heat.classes_
        with self.assertRaises(AttributeError):
            gnb_heat.class_prior_
        with self.assertRaises(AttributeError):
            gnb_heat.epsilon_

        # test GaussianNB locally, no weights
        local_fit = gnb_heat.fit(X_train, y_train)
        self.assert_array_equal(gnb_heat.classes_, np.array([0, 1, 2]))
        local_fit_no_classes = gnb_heat.partial_fit(X_train, y_train, classes=None)
        y_pred_local = local_fit_no_classes.predict(X_test)
        y_pred_proba_local = local_fit.predict_proba(X_test)
        sklearn_class_prior = np.array([0.38666667, 0.26666667, 0.34666667])
        sklearn_epsilon = np.array([3.6399040000000003e-09])
        sklearn_theta = ht.array(
            [
                [4.97586207, 3.35862069, 1.44827586, 0.23448276],
                [5.935, 2.71, 4.185, 1.3],
                [6.77692308, 3.09230769, 5.73461538, 2.10769231],
            ],
            dtype=X_train.dtype,
            device=ht_device,
        )
        sklearn_sigma = ht.array(
            [
                [0.10321047, 0.13208086, 0.01629013, 0.00846612],
                [0.256275, 0.0829, 0.255275, 0.046],
                [0.38869823, 0.10147929, 0.31303255, 0.04763314],
            ],
            dtype=X_train.dtype,
            device=ht_device,
        )
        self.assertIsInstance(y_pred_local, ht.DNDarray)
        self.assertEqual((y_pred_local != y_test).sum(), ht.array(4))
        self.assert_array_equal(gnb_heat.class_prior_, sklearn_class_prior)
        self.assert_array_equal(gnb_heat.epsilon_, sklearn_epsilon)
        self.assertTrue(ht.isclose(gnb_heat.theta_, sklearn_theta).all())
        self.assertTrue(ht.isclose(gnb_heat.sigma_, sklearn_sigma, atol=1e-1).all())
        self.assertTrue(ht.isclose(y_pred_proba_sklearn, y_pred_proba_local, atol=1e-1).all())

        # test GaussianNB when sample_weight is not None, sample_weight not distributed
        sample_weight = ht.ones((y_train.gshape[0]), dtype=ht.float32, split=None)
        local_fit_weight = gnb_heat.fit(X_train, y_train, sample_weight=sample_weight)
        y_pred_local_weight = local_fit_weight.predict(X_test)
        y_pred_proba_local_weight = local_fit_weight.predict_proba(X_test)
        self.assertIsInstance(y_pred_local_weight, ht.DNDarray)
        self.assert_array_equal(gnb_heat.class_prior_, sklearn_class_prior)
        self.assert_array_equal(gnb_heat.epsilon_, sklearn_epsilon)
        self.assertTrue(ht.isclose(gnb_heat.theta_, sklearn_theta).all())
        self.assertTrue(ht.isclose(gnb_heat.sigma_, sklearn_sigma, atol=1e-1).all())
        self.assert_array_equal(y_pred_local_weight, y_pred_local.numpy())
        self.assertTrue(ht.isclose(y_pred_proba_sklearn, y_pred_proba_local_weight).all())

        # test GaussianNB, data and labels distributed along split axis 0
        X_train_split = ht.resplit(X_train, axis=0)
        X_test_split = ht.resplit(X_test, axis=0)
        y_train_split = ht.resplit(y_train, axis=0)
        y_test_split = ht.resplit(y_test, axis=0)
        y_pred_split = gnb_heat.fit(X_train_split, y_train_split).predict(X_test_split)
        self.assert_array_equal(gnb_heat.class_prior_, sklearn_class_prior)
        self.assert_array_equal(gnb_heat.epsilon_, sklearn_epsilon)
        self.assertTrue(ht.isclose(gnb_heat.theta_, sklearn_theta).all())
        self.assertTrue(ht.isclose(gnb_heat.sigma_, sklearn_sigma, atol=1e-1).all())
        self.assert_array_equal(y_pred_split, y_pred_local.numpy())
        self.assertEqual((y_pred_split != y_test_split).sum(), ht.array(4))
        sample_weight_split = ht.ones(y_train_split.gshape[0], dtype=ht.float32, split=0)
        y_pred_split_weight = gnb_heat.fit(
            X_train_split, y_train_split, sample_weight=sample_weight_split
        ).predict(X_test_split)
        self.assertIsInstance(y_pred_split_weight, ht.DNDarray)
        self.assert_array_equal(y_pred_split_weight, y_pred_split.numpy())

        # test exceptions
        X_torch = torch.ones(75, 4)
        y_np = np.zeros(75)
        y_2D = ht.ones((75, 1), split=None, device=ht_device)
        weights_torch = torch.zeros(75)
        X_3D = ht.ones((75, 4, 4), split=None, device=ht_device)
        X_wrong_size = ht.ones((75, 5), split=None, device=ht_device)
        y_wrong_size = ht.zeros(76, device=ht_device)
        X_train_split = ht.resplit(X_train, axis=0)
        y_train_split = ht.resplit(y_train, axis=0)
        weights_2D_split = y_2D = ht.ones((75, 1), split=0, device=ht_device)
        weights_wrong_size = ht.ones(76, device=ht_device)
        priors_wrong_shape = ht.random.randn(4, device=ht_device)
        priors_wrong_sum = ht.random.randn(3, dtype=ht.float32, device=ht_device)
        priors_wrong_sign = ht.array([-0.3, 0.7, 0.6])
        wrong_classes = ht.array([3, 4, 5])

        with self.assertRaises(ValueError):
            gnb_heat.fit(X_torch, y_train)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_np)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_2D)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_train, sample_weight=weights_torch)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_3D, y_train)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_wrong_size)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_train)
            gnb_heat.predict(X_torch)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_train)
            gnb_heat.partial_fit(X_wrong_size, y_train)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_train)
            gnb_heat.partial_fit(X_train, y_train, classes=wrong_classes)
        with self.assertRaises(ValueError):
            gnb_heat.classes_ = None
            gnb_heat.partial_fit(X_train, y_train, classes=None)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train_split, y_train_split, sample_weight=weights_2D_split)
        with self.assertRaises(ValueError):
            gnb_heat.fit(X_train, y_train, sample_weight=weights_wrong_size)
        with self.assertRaises(ValueError):
            gnb_heat.priors = priors_wrong_shape
            gnb_heat.fit(X_train, y_train)
        with self.assertRaises(ValueError):
            gnb_heat.priors = priors_wrong_sum
            gnb_heat.fit(X_train, y_train)
        with self.assertRaises(ValueError):
            gnb_heat.priors = priors_wrong_sign
            gnb_heat.fit(X_train, y_train)

Example #8

Show file

    def __partial_fit(self,
                      X,
                      y,
                      classes=None,
                      _refit=False,
                      sample_weight=None):
        """
        Actual implementation of Gaussian NB fitting. Adapted to HeAT from scikit-learn.

        Parameters
        ----------
        X : ht.tensor of shape (n_samples, n_features)
            Training set, where n_samples is the number of samples and
            n_features is the number of features.
        y : ht.tensor of shape (n_samples,)
            Labels for training set.
        classes : ht.tensor of shape (n_classes,), optional (default=None)
            List of all the classes that can possibly appear in the y vector.
            Must be provided at the first call to partial_fit, can be omitted
            in subsequent calls.
        _refit : bool, optional (default=False)
            If true, act as though this were the first time __partial_fit is called
            (ie, throw away any past fitting and start over).
        sample_weight : ht.tensor of shape (n_samples,), optional (default=None)
            Weights applied to individual samples (1. for unweighted).

        Returns
        -------
        self : object
        """

        # TODO: sanitize X and y shape: sanitation/validation module, cf. #468
        n_samples = X.shape[0]
        if X.numdims != 2:
            raise ValueError("expected X to be a 2-D tensor, is {}-D".format(
                X.numdims))
        if y.shape[0] != n_samples:
            raise ValueError(
                "y.shape[0] must match number of samples {}, is {}".format(
                    n_samples, y.shape[0]))

        # TODO: sanitize sample_weight: sanitation/validation module, cf. #468
        if sample_weight is not None:
            if sample_weight.numdims != 1:
                raise ValueError("Sample weights must be 1D tensor")
            if sample_weight.shape != (n_samples, ):
                raise ValueError(
                    "sample_weight.shape == {}, expected {}!".format(
                        sample_weight.shape, (n_samples, )))

        # If the ratio of data variance between dimensions is too small, it
        # will cause numerical errors. To address this, we artificially
        # boost the variance by epsilon, a small fraction of the standard
        # deviation of the largest dimension.
        self.epsilon_ = self.var_smoothing * ht.var(X, axis=0).max()

        if _refit:
            self.classes_ = None

        if self.__check_partial_fit_first_call(classes):
            # This is the first call to partial_fit:
            # initialize various cumulative counters
            n_features = X.shape[1]
            n_classes = len(self.classes_)
            self.theta_ = ht.zeros((n_classes, n_features),
                                   dtype=X.dtype,
                                   device=X.device)
            self.sigma_ = ht.zeros((n_classes, n_features),
                                   dtype=X.dtype,
                                   device=X.device)

            self.class_count_ = ht.zeros((n_classes, ),
                                         dtype=ht.float64,
                                         device=X.device)

            # Initialise the class prior
            # Take into account the priors
            if self.priors is not None:
                if not isinstance(self.priors, ht.DNDarray):
                    priors = ht.array(self.priors,
                                      dtype=X.dtype,
                                      split=None,
                                      device=X.device)
                else:
                    priors = self.priors
                # Check that the provide prior match the number of classes
                if len(priors) != n_classes:
                    raise ValueError("Number of priors must match number of"
                                     " classes.")
                # Check that the sum is 1
                if not ht.isclose(priors.sum(),
                                  ht.array(1.0, dtype=priors.dtype)):
                    raise ValueError("The sum of the priors should be 1.")
                # Check that the prior are non-negative
                if (priors < 0).any():
                    raise ValueError("Priors must be non-negative.")
                self.class_prior_ = priors
            else:
                # Initialize the priors to zeros for each class
                self.class_prior_ = ht.zeros(len(self.classes_),
                                             dtype=ht.float64,
                                             split=None,
                                             device=X.device)
        else:
            if X.shape[1] != self.theta_.shape[1]:
                raise ValueError(
                    "Number of features {} does not match previous data {}.".
                    format(X.shape[1], self.theta_.shape[1]))
            # Put epsilon back in each time
            self.sigma_[:, :] -= self.epsilon_

        classes = self.classes_

        unique_y = ht.unique(y, sorted=True)
        if unique_y.split is not None:
            unique_y = ht.resplit(unique_y, axis=None)
        unique_y_in_classes = ht.eq(unique_y, classes)

        if not ht.all(unique_y_in_classes):
            raise ValueError("The target label(s) {} in y do not exist in the "
                             "initial classes {}".format(
                                 unique_y[~unique_y_in_classes], classes))
        for y_i in unique_y:
            # assuming classes.split is None
            if y_i in classes:
                i = ht.where(classes == y_i).item()
            else:
                classes_ext = torch.cat((classes._DNDarray__array,
                                         y_i._DNDarray__array.unsqueeze(0)))
                i = torch.argsort(classes_ext)[-1].item()
            where_y_i = ht.where(y == y_i)._DNDarray__array.tolist()
            X_i = X[where_y_i, :]

            if sample_weight is not None:
                sw_i = sample_weight[where_y_i]
                if 0 not in sw_i.shape:
                    N_i = sw_i.sum()
                else:
                    N_i = 0.0
                    sw_i = None
            else:
                sw_i = None
                N_i = X_i.shape[0]

            new_theta, new_sigma = self.__update_mean_variance(
                self.class_count_[i], self.theta_[i, :], self.sigma_[i, :],
                X_i, sw_i)

            self.theta_[i, :] = new_theta
            self.sigma_[i, :] = new_sigma
            self.class_count_[i] += N_i

        self.sigma_[:, :] += self.epsilon_

        # Update if only no priors is provided
        if self.priors is None:
            # Empirical prior, with sample_weight taken into account
            self.class_prior_ = self.class_count_ / self.class_count_.sum()

        return self

Example #9

Show file

    def test_data_parallel(self):
        import heat.nn.functional as F

        with self.assertRaises(TypeError):
            ht.utils.data.datatools.DataLoader("asdf")

        class Model(ht.nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                # 1 input image channel, 6 output channels, 3x3 square convolution
                # kernel
                self.conv1 = ht.nn.Conv2d(1, 6, 3)
                self.conv2 = ht.nn.Conv2d(6, 16, 3)
                # an affine operation: y = Wx + b
                self.fc1 = ht.nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
                self.fc2 = ht.nn.Linear(120, 84)
                self.fc3 = ht.nn.Linear(84, 10)

            def forward(self, x):
                # Max pooling over a (2, 2) window
                x = self.conv1(x)
                x = F.max_pool2d(F.relu(x), (2, 2))
                # If the size is a square you can only specify a single number
                x = F.max_pool2d(F.relu(self.conv2(x)), 2)
                x = x.view(-1, self.num_flat_features(x))
                x = F.relu(self.fc1(x))
                x = F.relu(self.fc2(x))
                x = self.fc3(x)
                return x

            def num_flat_features(self, x):
                size = x.size()[1:]  # all dimensions except the batch dimension
                num_features = 1
                for s in size:
                    num_features *= s
                return num_features

        class TestDataset(ht.utils.data.Dataset):
            def __init__(self, array, ishuffle):
                super(TestDataset, self).__init__(array, ishuffle=ishuffle)

            def __getitem__(self, item):
                return self.data[item]

            def Ishuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_ishuffle(self, attrs=[["data", None]])

            def Shuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_shuffle(self, attrs=[["data", None]])

        # create model and move it to GPU with id rank
        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        with self.assertRaises(TypeError):
            ht.optim.DataParallelOptimizer(optimizer, "asdf")
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, True)

        ht.random.seed(1)
        torch.random.manual_seed(1)

        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = TestDataset(data, ishuffle=True)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        # there is only 1 batch on each process (data size[0] is 2 * number of processes, and the batch size is 2)
        self.assertTrue(len(dataloader) == 1)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=True
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)
        lim = 1e-4

        loss_fn = torch.nn.MSELoss()
        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()

            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))

        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, False)
        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = ht.utils.data.Dataset(data, ishuffle=False)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=False
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)

        with self.assertRaises(TypeError):
            ht.nn.DataParallel(model, data.comm, "asdf")

        loss_fn = torch.nn.MSELoss()
        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()
            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))

        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, False)
        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = ht.utils.data.Dataset(data, ishuffle=True)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=False
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)

        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()
            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))
        with self.assertWarns(Warning):
            ht_model = ht.nn.DataParallel(
                model, ht.MPI_WORLD, [dp_optimizer, dp_optimizer], blocking_parameter_updates=False
            )
        # NOTE: this will throw a warning: this is expected
        self.assertTrue(ht_model.blocking_parameter_updates)