Example #1
0
 def setup(self):
     from neon.backends.mgpu import MGPU, MGPUTensor
     # this code gets called prior to each test
     try:
         self.be = MGPU(rng_seed=0, num_dev=2)
     except AssertionError:
         # likely that only one GPU device is available
         self.be = MGPU(rng_seed=0, num_dev=1)
     self.gpt = MGPUTensor
Example #2
0
 def setup(self):
     from neon.backends.mgpu import MGPU, MGPUTensor
     # this code gets called prior to each test
     try:
         self.be = MGPU(rng_seed=0, num_dev=2)
     except AssertionError:
         # likely that only one GPU device is available
         self.be = MGPU(rng_seed=0, num_dev=1)
     self.gpt = MGPUTensor
Example #3
0
class TestGPU(object):
    def setup(self):
        from neon.backends.mgpu import MGPU, MGPUTensor
        # this code gets called prior to each test
        try:
            self.be = MGPU(rng_seed=0, num_dev=2)
        except AssertionError:
            # likely that only one GPU device is available
            self.be = MGPU(rng_seed=0, num_dev=1)
        self.gpt = MGPUTensor

    def reduction_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        # create a numpy array as the test-bed
        asize = 9
        # round up to the nearest multiple of num_dev
        bsize = -(-asize // nr) * nr
        h_a = np.random.randn(asize * nr).reshape(
            (nr, asize)).astype(self.be.default_dtype)
        h_result = np.sum(h_a, axis=0, keepdims=True)

        d_a = self.be.empty((1, asize))
        u_a = self.be.empty((1, bsize))
        self.be.scatter(h_a, d_a)
        self.be.reduce(d_a, u_a)
        print(h_result)
        print(d_a.tlist[0].asnumpyarray())

        for i in range(nr):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result,
                                       atol=1e-6,
                                       rtol=0)

    def memset_test(self):
        # create a numpy array as the test-bed
        asize = 9

        h_result = np.zeros((1, asize))
        d_a = self.be.zeros((1, asize))

        for i in range(self.be.num_dev):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result,
                                       atol=1e-6,
                                       rtol=0)

    def frag2rep_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        np.random.seed(0)
        # create a numpy array as the test-bed
        (rows, cols) = (24, 128)
        indim = rows * cols
        odim = indim * nr

        # h_frags has the data in the order we expect on the device
        h_frags_t = np.random.randn(odim).reshape(
            (nr * cols, rows)).astype(self.be.default_dtype)
        h_frags = h_frags_t.transpose().astype(self.be.default_dtype,
                                               order='C')

        d_frags = self.be.empty((rows, cols))
        d_frags_t = self.be.empty((cols, rows))

        d_reps = self.be.empty((rows, cols * nr))
        d_reps_t = self.be.empty((cols * nr, rows))

        self.be.scatter(h_frags_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)

        np.testing.assert_allclose(d_frags.asnumpyarray(),
                                   h_frags,
                                   atol=1e-5,
                                   rtol=0)

        self.be.fragment_to_replica(d_frags_t, d_reps_t)
        self.be.transpose(d_reps_t, d_reps)

        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5,
                                       rtol=0)
        print("Frag2Rep OK")

        d_frags_t.fill(0)
        self.be.replica_to_fragment(d_reps_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)
        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5,
                                       rtol=0)
        print("Rep2Frag OK")
Example #4
0
class TestGPU(object):

    def setup(self):
        from neon.backends.mgpu import MGPU, MGPUTensor
        # this code gets called prior to each test
        try:
            self.be = MGPU(rng_seed=0, num_dev=2)
        except AssertionError:
            # likely that only one GPU device is available
            self.be = MGPU(rng_seed=0, num_dev=1)
        self.gpt = MGPUTensor

    def reduction_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        # create a numpy array as the test-bed
        asize = 9
        # round up to the nearest multiple of num_dev
        bsize = -(-asize // nr) * nr
        h_a = np.random.randn(asize * nr).reshape(
            (nr, asize)).astype(self.be.default_dtype)
        h_result = np.sum(h_a, axis=0, keepdims=True)

        d_a = self.be.empty((1, asize))
        u_a = self.be.empty((1, bsize))
        self.be.scatter(h_a, d_a)
        self.be.reduce(d_a, u_a)
        print(h_result)
        print(d_a.tlist[0].asnumpyarray())

        for i in range(nr):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result, atol=1e-6, rtol=0)

    def memset_test(self):
        # create a numpy array as the test-bed
        asize = 9

        h_result = np.zeros((1, asize))
        d_a = self.be.zeros((1, asize))

        for i in range(self.be.num_dev):
            np.testing.assert_allclose(d_a.tlist[i].asnumpyarray(),
                                       h_result, atol=1e-6, rtol=0)

    def frag2rep_test(self):
        nr = self.be.num_dev
        if nr == 1:  # This shouldn't be supported
            return
        np.random.seed(0)
        # create a numpy array as the test-bed
        (rows, cols) = (24, 128)
        indim = rows * cols
        odim = indim * nr

        # h_frags has the data in the order we expect on the device
        h_frags_t = np.random.randn(odim).reshape(
            (nr * cols, rows)).astype(self.be.default_dtype)
        h_frags = h_frags_t.transpose().astype(
            self.be.default_dtype, order='C')

        d_frags = self.be.empty((rows, cols))
        d_frags_t = self.be.empty((cols, rows))

        d_reps = self.be.empty((rows, cols * nr))
        d_reps_t = self.be.empty((cols * nr, rows))

        self.be.scatter(h_frags_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)

        np.testing.assert_allclose(d_frags.asnumpyarray(),
                                   h_frags, atol=1e-5, rtol=0)

        self.be.fragment_to_replica(d_frags_t, d_reps_t)
        self.be.transpose(d_reps_t, d_reps)

        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5, rtol=0)
        print("Frag2Rep OK")

        d_frags_t.fill(0)
        self.be.replica_to_fragment(d_reps_t, d_frags_t)
        self.be.transpose(d_frags_t, d_frags)
        for i in range(nr):
            np.testing.assert_allclose(d_frags.asnumpyarray(),
                                       d_reps.tlist[i].asnumpyarray(),
                                       atol=1e-5, rtol=0)
        print("Rep2Frag OK")
Example #5
0
def gen_backend(model=None,
                gpu=None,
                nrv=False,
                flexpoint=False,
                rng_seed=None,
                numerr_handling=None,
                half=False,
                stochastic_round=0,
                device_id=None):
    """
    Construct and return a backend instance of the appropriate type based on
    the arguments given.  With no parameters, a single CPU core, float32
    backend is returned.

    Arguments:
        model (neon.models.model.Model): The instantiated model upon which we
                                         will utilize this backend.
        gpu (string, optional): Attempt to utilize a CUDA capable GPU if
                                installed in the system. Defaults to None which
                                implies a CPU based backend.  If 'cudanet',
                                utilize a cuda-convnet2 based backed, which
                                supports Kepler and Maxwell GPUs with single
                                precision. If 'nervanagpu', attempt to utilize
                                the NervanaGPU Maxwell backend with float16 and
                                float32 support.
        nrv (bool, optional): If True, attempt to utilize the Nervana Engine
                              for computation (must be installed on the
                              system).  Defaults to False which implies a CPU
                              based backend.
        rng_seed (numeric, optional): Set this to a numeric value which can be
                                      used to seed the random number generator
                                      of the instantiated backend.  Defaults to
                                      None, which doesn't explicitly seed (so
                                      each run will be different)
        stochastic_round (numeric, optional): Only affects the max backend. If
                                              1, perform stochastic rounding.
                                              If 0, round to nearest.
        numerr_handling (dict, optional): Dictate how numeric errors are
                                          displayed and handled.  The keys and
                                          values permissible for this dict
                                          match that seen in numpy.seterr.
                                          If set to None (the default),
                                          behavior is equivalent to
                                          {'all': 'warn'}
        device_id (numeric, optional): Set this to a numeric value which can be
                                       used to select which device to run the
                                       process on

    Returns:
        Backend: newly constructed backend instance of the specifed type.

    Notes:
        * Attempts to construct a GPU instance without a CUDA capable card or
          without cudanet or nervanagpu package installed will cause the
          program to display an error message and exit.
        * Attempts to construct a parallel instance without mpi4py installed
          will cause the program to display an error message and exit.
        * The returned backend will still need to call its par.init_model()
          at some point after the model has been linked, in order for parallel
          training to proceed.
    """
    logger = logging.getLogger(__name__)
    gpuflag = False

    if gpu is not None:
        gpu = gpu.lower()
        if sys.platform.startswith("linux"):
            gpuflag = (os.system("nvcc --version > /dev/null 2>&1") == 0)
        elif sys.platform.startswith("darwin"):
            gpuflag = (
                os.system("kextstat | grep -i cuda > /dev/null 2>&1") == 0)
        if gpuflag and gpu == 'cudanet':
            try:
                import cudanet  # noqa
                from neon.backends.cc2 import GPU
                be_name = 'Cudanet'
                be = GPU(rng_seed=rng_seed, device_id=device_id)
            except ImportError:
                raise RuntimeError("cudanet not found, can't run via GPU")
        elif gpuflag and gpu.startswith('nervanagpu'):
            try:
                import nervanagpu  # noqa
                try:
                    be_name = 'NervanaGPU'
                    if gpu == 'nervanagpu':
                        device_id = 0 if device_id is None else device_id[0]
                        from neon.backends.gpu import GPU
                        be = GPU(rng_seed=rng_seed,
                                 stochastic_round=stochastic_round,
                                 device_id=device_id)
                    else:
                        from neon.backends.mgpu import MGPU
                        try:
                            num_dev = int(gpu.strip('nervanagpu'))
                        except (ValueError):
                            raise ValueError("invalid number of GPUs" +
                                             " specified")
                        if not device_id:
                            device_id = range(num_dev)
                        if len(device_id) != num_dev:
                            raise RuntimeError(
                                "Incorrect number of devices"
                                " specified ", device_id, num_dev)
                        be = MGPU(rng_seed=rng_seed,
                                  stochastic_round=stochastic_round,
                                  device_id=device_id,
                                  num_dev=num_dev)
                except ImportError:
                    logger.warning("pycuda error, can't run via GPU")
                    gpuflag = False
            except ImportError:
                logger.warning("nervanagpu not found, can't run via GPU")
                gpuflag = False
        if gpuflag is False:
            raise RuntimeError("Can't find CUDA capable GPU")
    elif nrv:
        nrv = False
        try:
            from umd.nrv_backend import NRVBackend
            nrv = True
        except ImportError:
            logger.warning("Nervana Engine system software not found")

    if flexpoint:
        logger.warning("Flexpoint(TM) backend not currently available")

    if nrv:
        be_name = 'NRV'
        be = NRVBackend(rng_seed=rng_seed,
                        seterr_handling=numerr_handling,
                        device_id=device_id)
    elif not gpuflag:
        be_name = 'CPU'
        be = CPU(rng_seed=rng_seed, seterr_handling=numerr_handling)
    logger.info("{} backend, RNG seed: {}, numerr: {}".format(
        be_name, rng_seed, numerr_handling))

    return be