Example #1
0
class TestGPU(object):
    def setup(self):
        from neon.backends.mgpu import MGPU, MGPUTensor
        # this code gets called prior to each test
        try:
            self.be = MGPU(rng_seed=0, num_dev=2)
        except AssertionError:
            # likely that only one GPU device is available
            self.be = MGPU(rng_seed=0, num_dev=1)
        self.gpt = MGPUTensor

    def reduction_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        # create a numpy array as the test-bed
        asize = 9
        # round up to the nearest multiple of num_dev
        bsize = -(-asize // nr) * nr
        h_a = np.random.randn(asize * nr).reshape(
            (nr, asize)).astype(self.be.default_dtype)
        h_result = np.sum(h_a, axis=0, keepdims=True)

        d_a = self.be.empty((1, asize))
        u_a = self.be.empty((1, bsize))
        self.be.scatter(h_a, d_a)
        self.be.reduce(d_a, u_a)
        print(h_result)
        print(d_a.tlist[0].asnumpyarray())

        for i in range(nr):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result,
                                       atol=1e-6,
                                       rtol=0)

    def memset_test(self):
        # create a numpy array as the test-bed
        asize = 9

        h_result = np.zeros((1, asize))
        d_a = self.be.zeros((1, asize))

        for i in range(self.be.num_dev):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result,
                                       atol=1e-6,
                                       rtol=0)

    def frag2rep_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        np.random.seed(0)
        # create a numpy array as the test-bed
        (rows, cols) = (24, 128)
        indim = rows * cols
        odim = indim * nr

        # h_frags has the data in the order we expect on the device
        h_frags_t = np.random.randn(odim).reshape(
            (nr * cols, rows)).astype(self.be.default_dtype)
        h_frags = h_frags_t.transpose().astype(self.be.default_dtype,
                                               order='C')

        d_frags = self.be.empty((rows, cols))
        d_frags_t = self.be.empty((cols, rows))

        d_reps = self.be.empty((rows, cols * nr))
        d_reps_t = self.be.empty((cols * nr, rows))

        self.be.scatter(h_frags_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)

        np.testing.assert_allclose(d_frags.asnumpyarray(),
                                   h_frags,
                                   atol=1e-5,
                                   rtol=0)

        self.be.fragment_to_replica(d_frags_t, d_reps_t)
        self.be.transpose(d_reps_t, d_reps)

        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5,
                                       rtol=0)
        print("Frag2Rep OK")

        d_frags_t.fill(0)
        self.be.replica_to_fragment(d_reps_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)
        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5,
                                       rtol=0)
        print("Rep2Frag OK")
Example #2
0
class TestGPU(object):

    def setup(self):
        from neon.backends.mgpu import MGPU, MGPUTensor
        # this code gets called prior to each test
        try:
            self.be = MGPU(rng_seed=0, num_dev=2)
        except AssertionError:
            # likely that only one GPU device is available
            self.be = MGPU(rng_seed=0, num_dev=1)
        self.gpt = MGPUTensor

    def reduction_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        # create a numpy array as the test-bed
        asize = 9
        # round up to the nearest multiple of num_dev
        bsize = -(-asize // nr) * nr
        h_a = np.random.randn(asize * nr).reshape(
            (nr, asize)).astype(self.be.default_dtype)
        h_result = np.sum(h_a, axis=0, keepdims=True)

        d_a = self.be.empty((1, asize))
        u_a = self.be.empty((1, bsize))
        self.be.scatter(h_a, d_a)
        self.be.reduce(d_a, u_a)
        print(h_result)
        print(d_a.tlist[0].asnumpyarray())

        for i in range(nr):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result, atol=1e-6, rtol=0)

    def memset_test(self):
        # create a numpy array as the test-bed
        asize = 9

        h_result = np.zeros((1, asize))
        d_a = self.be.zeros((1, asize))

        for i in range(self.be.num_dev):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result, atol=1e-6, rtol=0)

    def frag2rep_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        np.random.seed(0)
        # create a numpy array as the test-bed
        (rows, cols) = (24, 128)
        indim = rows * cols
        odim = indim * nr

        # h_frags has the data in the order we expect on the device
        h_frags_t = np.random.randn(odim).reshape(
            (nr * cols, rows)).astype(self.be.default_dtype)
        h_frags = h_frags_t.transpose().astype(
            self.be.default_dtype, order='C')

        d_frags = self.be.empty((rows, cols))
        d_frags_t = self.be.empty((cols, rows))

        d_reps = self.be.empty((rows, cols * nr))
        d_reps_t = self.be.empty((cols * nr, rows))

        self.be.scatter(h_frags_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)

        np.testing.assert_allclose(d_frags.asnumpyarray(),
                                   h_frags, atol=1e-5, rtol=0)

        self.be.fragment_to_replica(d_frags_t, d_reps_t)
        self.be.transpose(d_reps_t, d_reps)

        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5, rtol=0)
        print("Frag2Rep OK")

        d_frags_t.fill(0)
        self.be.replica_to_fragment(d_reps_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)
        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5, rtol=0)
        print("Rep2Frag OK")