コード例 #1
0
    def test_set_default_device_cpu(self):
        ht.use_device("cpu")
        self.assertIs(ht.get_device(), ht.cpu)
        ht.use_device(ht.cpu)
        self.assertIs(ht.get_device(), ht.cpu)
        ht.use_device(None)
        self.assertIs(ht.get_device(), ht.cpu)

        with self.assertRaises(ValueError):
            ht.use_device("fpu")
        with self.assertRaises(ValueError):
            ht.use_device(1)
コード例 #2
0
    def test_set_default_device_gpu(self):
        if ht.torch.cuda.is_available():
            ht.use_device("gpu")
            self.assertIs(ht.get_device(), ht.gpu)
            ht.use_device(ht.gpu)
            self.assertIs(ht.get_device(), ht.gpu)
            ht.use_device(None)
            self.assertIs(ht.get_device(), ht.gpu)

        with self.assertRaises(ValueError):
            ht.use_device("fpu")
        with self.assertRaises(ValueError):
            ht.use_device(1)
コード例 #3
0
    def test_asarray(self):
        # same heat array
        arr = ht.array([1, 2])
        self.assertTrue(ht.asarray(arr) is arr)

        # from distributed python list
        arr = ht.array([1, 2, 3, 4, 5, 6], split=0)
        lst = arr.tolist(keepsplit=True)
        asarr = ht.asarray(lst, is_split=0)

        self.assertEqual(asarr.shape, arr.shape)
        self.assertEqual(asarr.split, 0)
        self.assertEqual(asarr.device, ht.get_device())
        self.assertTrue(ht.equal(asarr, arr))

        # from numpy array
        arr = np.array([1, 2, 3, 4])
        asarr = ht.asarray(arr)

        self.assertTrue(np.alltrue(np.equal(asarr.numpy(), arr)))

        asarr[0] = 0
        if asarr.device == ht.cpu:
            self.assertEqual(asarr.numpy()[0], arr[0])

        # from torch tensor
        arr = torch.tensor([1, 2, 3, 4], device=self.device.torch_device)
        asarr = ht.asarray(arr)

        self.assertTrue(torch.equal(asarr.larray, arr))

        asarr[0] = 0
        self.assertEqual(asarr.larray[0].item(), arr[0].item())
コード例 #4
0
        def train(model, device, optimizer, target, batches=20, scaler=None):
            model.train()
            optimizer.last_batch = batches - 1
            loss_fn = torch.nn.MSELoss()
            torch.random.manual_seed(10)
            data = torch.rand(batches,
                              2,
                              1,
                              32,
                              32,
                              device=ht.get_device().torch_device)
            for b in range(batches):
                d, t = data[b].to(device), target[b].to(device)
                optimizer.zero_grad()
                if scaler is not None:
                    with torch.cuda.amp.autocast():
                        output = model(d)
                        loss = loss_fn(output, t)
                    ret_loss = loss.clone().detach()
                    scaler.scale(loss).backward()
                else:
                    output = model(d)
                    loss = loss_fn(output, t)
                    ret_loss = loss.clone().detach()
                    loss.backward()

                optimizer.step()
            return ret_loss
コード例 #5
0
    def test_set_default_device(self):
        if os.environ.get("DEVICE") == "gpu":
            ht.use_device("gpu")
            self.assertIs(ht.get_device(), ht.gpu)
            ht.use_device(ht.gpu)
            self.assertIs(ht.get_device(), ht.gpu)
            ht.use_device(None)
            self.assertIs(ht.get_device(), ht.gpu)
        else:
            ht.use_device("cpu")
            self.assertIs(ht.get_device(), ht.cpu)
            ht.use_device(ht.cpu)
            self.assertIs(ht.get_device(), ht.cpu)
            ht.use_device(None)
            self.assertIs(ht.get_device(), ht.cpu)

        with self.assertRaises(ValueError):
            ht.use_device("fpu")
        with self.assertRaises(ValueError):
            ht.use_device(1)
コード例 #6
0
ファイル: test_logical.py プロジェクト: melven/heat
import torch
import unittest
import os
import heat as ht

if os.environ.get("DEVICE") == "gpu" and torch.cuda.is_available():
    ht.use_device("gpu")
    torch.cuda.set_device(torch.device(ht.get_device().torch_device))
else:
    ht.use_device("cpu")
device = ht.get_device().torch_device
ht_device = None
if os.environ.get("DEVICE") == "lgpu" and torch.cuda.is_available():
    device = ht.gpu.torch_device
    ht_device = ht.gpu
    torch.cuda.set_device(device)


class TestLogical(unittest.TestCase):
    def test_all(self):
        array_len = 9

        # check all over all float elements of 1d tensor locally
        ones_noaxis = ht.ones(array_len, device=ht_device)
        x = (ones_noaxis == 1).all()

        self.assertIsInstance(x, ht.DNDarray)
        self.assertEqual(x.shape, (1, ))
        self.assertEqual(x.lshape, (1, ))
        self.assertEqual(x.dtype, ht.bool)
        self.assertEqual(x._DNDarray__array.dtype, torch.bool)
コード例 #7
0
    def test_data_parallel(self):
        import heat.nn.functional as F

        with self.assertRaises(TypeError):
            ht.utils.data.datatools.DataLoader("asdf")

        class Model(ht.nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                # 1 input image channel, 6 output channels, 3x3 square convolution
                # kernel
                self.conv1 = ht.nn.Conv2d(1, 6, 3)
                self.conv2 = ht.nn.Conv2d(6, 16, 3)
                # an affine operation: y = Wx + b
                self.fc1 = ht.nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
                self.fc2 = ht.nn.Linear(120, 84)
                self.fc3 = ht.nn.Linear(84, 10)

            def forward(self, x):
                # Max pooling over a (2, 2) window
                x = self.conv1(x)
                x = F.max_pool2d(F.relu(x), (2, 2))
                # If the size is a square you can only specify a single number
                x = F.max_pool2d(F.relu(self.conv2(x)), 2)
                x = x.view(-1, self.num_flat_features(x))
                x = F.relu(self.fc1(x))
                x = F.relu(self.fc2(x))
                x = self.fc3(x)
                return x

            def num_flat_features(self, x):
                size = x.size()[1:]  # all dimensions except the batch dimension
                num_features = 1
                for s in size:
                    num_features *= s
                return num_features

        class TestDataset(ht.utils.data.Dataset):
            def __init__(self, array, ishuffle):
                super(TestDataset, self).__init__(array, ishuffle=ishuffle)

            def __getitem__(self, item):
                return self.data[item]

            def Ishuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_ishuffle(self, attrs=[["data", None]])

            def Shuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_shuffle(self, attrs=[["data", None]])

        # create model and move it to GPU with id rank
        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        with self.assertRaises(TypeError):
            ht.optim.DataParallelOptimizer(optimizer, "asdf")
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, True)

        ht.random.seed(1)
        torch.random.manual_seed(1)

        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = TestDataset(data, ishuffle=True)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        # there is only 1 batch on each process (data size[0] is 2 * number of processes, and the batch size is 2)
        self.assertTrue(len(dataloader) == 1)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=True
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)
        lim = 1e-4

        loss_fn = torch.nn.MSELoss()
        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()

            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))

        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, False)
        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = ht.utils.data.Dataset(data, ishuffle=False)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=False
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)

        with self.assertRaises(TypeError):
            ht.nn.DataParallel(model, data.comm, "asdf")

        loss_fn = torch.nn.MSELoss()
        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()
            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))

        model = Model()
        optimizer = ht.optim.SGD(model.parameters(), lr=0.001)
        dp_optimizer = ht.optim.DataParallelOptimizer(optimizer, False)
        labels = torch.randn((2, 10), device=ht.get_device().torch_device)
        data = ht.random.rand(2 * ht.MPI_WORLD.size, 1, 32, 32, split=0)
        dataset = ht.utils.data.Dataset(data, ishuffle=True)
        dataloader = ht.utils.data.datatools.DataLoader(dataset=dataset, batch_size=2)
        ht_model = ht.nn.DataParallel(
            model, data.comm, dp_optimizer, blocking_parameter_updates=False
        )
        if str(ht.get_device())[:3] == "gpu":
            ht_model.to(ht.get_device().torch_device)

        for _ in range(2):
            for data in dataloader:
                self.assertEqual(data.shape[0], 2)
                dp_optimizer.zero_grad()
                ht_outputs = ht_model(data)
                loss_fn(ht_outputs, labels).backward()
                dp_optimizer.step()
            for p in ht_model.parameters():
                p0dim = p.shape[0]
                hld = ht.resplit(ht.array(p, is_split=0))._DNDarray__array
                hld_list = [hld[i * p0dim : (i + 1) * p0dim] for i in range(ht.MPI_WORLD.size - 1)]
                for i in range(1, len(hld_list)):
                    self.assertTrue(torch.allclose(hld_list[0], hld_list[i], rtol=lim, atol=lim))
        with self.assertWarns(Warning):
            ht_model = ht.nn.DataParallel(
                model, ht.MPI_WORLD, [dp_optimizer, dp_optimizer], blocking_parameter_updates=False
            )
        # NOTE: this will throw a warning: this is expected
        self.assertTrue(ht_model.blocking_parameter_updates)
コード例 #8
0
 def test_get_default_device_gpu(self):
     if ht.torch.cuda.is_available():
         self.assertIs(ht.get_device(), ht.gpu)
コード例 #9
0
 def test_get_default_device_cpu(self):
     self.assertIs(ht.get_device(), ht.cpu)
コード例 #10
0
 def test_get_default_device(self):
     if os.environ.get("DEVICE") == "gpu":
         ht.use_device(os.environ.get("DEVICE"))
         self.assertIs(ht.get_device(), ht.gpu)
     else:
         self.assertIs(ht.get_device(), ht.cpu)
コード例 #11
0
    def test_daso(self):
        import heat.nn.functional as F
        import heat.optim as optim

        class Model(ht.nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.conv1 = ht.nn.Conv2d(1, 6, 3)
                self.conv2 = ht.nn.Conv2d(6, 16, 3)
                self.fc1 = ht.nn.Linear(16 * 6 * 6, 120)
                self.fc2 = ht.nn.Linear(120, 84)
                self.fc3 = ht.nn.Linear(84, 10)

            def forward(self, x):
                x = self.conv1(x)
                x = F.max_pool2d(F.relu(x), (2, 2))
                x = F.max_pool2d(F.relu(self.conv2(x)), 2)
                x = x.view(-1, self.num_flat_features(x))
                x = F.relu(self.fc1(x))
                x = F.relu(self.fc2(x))
                x = self.fc3(x)
                return x

            @staticmethod
            def num_flat_features(x):
                size = x.size()[
                    1:]  # all dimensions except the batch dimension
                num_features = 1
                for s in size:
                    num_features *= s
                return num_features

        class TestDataset(ht.utils.data.Dataset):
            def __init__(self, array, ishuffle):
                super(TestDataset, self).__init__(array, ishuffle=ishuffle)

            def __getitem__(self, item):
                return self.data[item]

            def Ishuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_ishuffle(self,
                                                   attrs=[["data", None]])

            def Shuffle(self):
                if not self.test_set:
                    ht.utils.data.dataset_shuffle(self, attrs=[["data", None]])

        def train(model, device, optimizer, target, batches=20, scaler=None):
            model.train()
            optimizer.last_batch = batches - 1
            loss_fn = torch.nn.MSELoss()
            torch.random.manual_seed(10)
            data = torch.rand(batches,
                              2,
                              1,
                              32,
                              32,
                              device=ht.get_device().torch_device)
            for b in range(batches):
                d, t = data[b].to(device), target[b].to(device)
                optimizer.zero_grad()
                if scaler is not None:
                    with torch.cuda.amp.autocast():
                        output = model(d)
                        loss = loss_fn(output, t)
                    ret_loss = loss.clone().detach()
                    scaler.scale(loss).backward()
                else:
                    output = model(d)
                    loss = loss_fn(output, t)
                    ret_loss = loss.clone().detach()
                    loss.backward()

                optimizer.step()
            return ret_loss

        model = Model()
        optimizer = optim.SGD(model.parameters(), lr=0.1)
        envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
        if ht.MPI_WORLD.size == 1 and envar == "cpu":
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer="asdf", total_epochs=1)
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer, total_epochs="aa")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              warmup_epochs="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              cooldown_epochs="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              scheduler="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              stability_level="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              max_global_skips="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              sending_chunk_size="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              verbose="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              use_mpi_groups="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              downcast_type="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              comm="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              local_skip_factor="asdf")
            with self.assertRaises(TypeError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              skip_reduction_factor="asdf")
                # local_skip_factor
                # skip_reduction_factor
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              downcast_type=torch.bool)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              warmup_epochs=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              cooldown_epochs=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              max_global_skips=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              sending_chunk_size=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer, total_epochs=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              local_skip_factor=-1)
            with self.assertRaises(ValueError):
                ht.optim.DASO(local_optimizer=optimizer,
                              total_epochs=1,
                              skip_reduction_factor=-1)
        if ht.MPI_WORLD.size != 8 or torch.cuda.device_count() == 0:
            # only run these tests for 2 nodes, each of which has 4 GPUs
            return

        # Training settings
        torch.manual_seed(1)

        gpus = torch.cuda.device_count()
        loc_rank = ht.MPI_WORLD.rank % gpus
        device = "cuda:" + str(loc_rank)
        os.environ["MASTER_ADDR"] = "localhost"
        os.environ["MASTER_PORT"] = "29500"
        os.environ["NCCL_SOCKET_IFNAME"] = "ib"
        if not torch.distributed.is_initialized():
            torch.distributed.init_process_group(backend="nccl",
                                                 rank=loc_rank,
                                                 world_size=gpus)
        torch.cuda.set_device(device)
        device = torch.device("cuda")

        model = Model().to(device)
        optimizer = optim.SGD(model.parameters(), lr=0.1)

        epochs = 20

        daso_optimizer = ht.optim.DASO(
            local_optimizer=optimizer,
            total_epochs=epochs,
            max_global_skips=8,
            stability_level=0.9999,
            warmup_epochs=1,
            cooldown_epochs=1,
            verbose=True,
        )
        dp_model = ht.nn.DataParallelMultiGPU(model, daso_optimizer)

        target = torch.rand((20, 2, 10), device=ht.get_device().torch_device)
        for epoch in range(epochs):
            ls = train(dp_model, device, daso_optimizer, target, batches=20)
            if epoch == 0:
                first_ls = ls
            daso_optimizer.epoch_loss_logic(ls)
        # test that the loss decreases
        self.assertTrue(ls < first_ls)
        # test if the smaller split value also works

        daso_optimizer.reset()
        epochs = 4
        daso_optimizer = ht.optim.DASO(
            local_optimizer=optimizer,
            total_epochs=epochs,
            max_global_skips=8,
            stability_level=0.9999,
            warmup_epochs=2,
            cooldown_epochs=1,
            use_mpi_groups=False,
            verbose=False,
            downcast_type=torch.half,
            sending_chunk_size=61194,
        )
        dp_model = ht.nn.DataParallelMultiGPU(model, daso_optimizer)
        scaler = torch.cuda.amp.GradScaler()
        daso_optimizer.add_scaler(scaler)
        for epoch in range(epochs):
            ls = train(dp_model,
                       device,
                       daso_optimizer,
                       target,
                       batches=20,
                       scaler=scaler)
            if epoch == 0:
                first_ls = ls
            daso_optimizer.epoch_loss_logic(ls, loss_globally_averaged=True)
        # test that the loss decreases
        self.assertTrue(ls < first_ls)
        with self.assertRaises(ValueError):
            daso_optimizer._prev_params = [1, 2]
            daso_optimizer._gs_rcv_update_params_last_batch(
                current_ranks=[0, 4])
        with self.assertRaises(ValueError):
            daso_optimizer.last_batch = None
            daso_optimizer.step()