def setup(self): from neon.backends.cc2 import GPU, GPUTensor # TODO: remove randomness from expected target results self.be = GPU(rng_seed=0) # reusable fake data self.inputs = GPUTensor(np.ones((2, 100))) # create fake layer nin = 2 conf = { 'name': 'testlayer', 'num_nodes': 2, 'weight_init': GaussianValGen(backend=self.be, loc=0.0, scale=0.01) } lr_params = {'learning_rate': 0.01} thislr = {'type': 'gradient_descent', 'lr_params': lr_params} activation = Logistic() self.layer = RBMLayer(name=conf['name']) # create fake cost self.cost = SumSquaredDiffs(olayer=self.layer) self.layer.initialize({ 'backend': self.be, 'batch_size': 100, 'lrule_init': thislr, 'nin': nin, 'nout': conf['num_nodes'], 'activation': activation, 'weight_init': conf['weight_init'] })
def test_xcov_derivative_cc2tensor(): from neon.backends.cc2 import GPU, GPUTensor be = GPU(rng_seed=0) np.random.seed(0) n = 10 k = 8 (k1, k2) = (3, 5) a = np.array(np.random.randn(k, n), dtype='float32', order='C') s = np.zeros_like(a) acc = xcc(a[:k1], a[k1:]) # k1 x k2 c1 = a[k1:] - a[k1:].mean(1, keepdims=True) # k2 x n c2 = a[:k1] - a[:k1].mean(1, keepdims=True) # k1 x n s[:k1] = acc.dot(c1) / n s[k1:] = acc.T.dot(c2) / n outputs = GPUTensor(a.copy()) tempbuf1 = be.empty((k1, n)) tempbuf2 = be.empty((k2, n)) tempbuf3 = be.empty((k1, k2)) tempbuf4 = be.empty(outputs.shape) temp = [tempbuf1, tempbuf2, tempbuf3, tempbuf4] my_result = xcov_cost_derivative(be, outputs, [], temp, k1) expected_result = GPUTensor(s) assert_tensor_near_equal(expected_result, my_result)
def test_cc2_rectleaky_derivative_slope_zero_rectlin_equiv(): from neon.backends.cc2 import GPU be = GPU() inputs = be.uniform(low=-5.0, high=10.0, size=(10, 10)) lin_buf = be.empty(inputs.shape) leaky_buf = be.empty(inputs.shape) be.rectlin_derivative(inputs, out=lin_buf) be.rectleaky_derivative(inputs, slope=0.0, out=leaky_buf) assert_tensor_equal(lin_buf, leaky_buf)
def test_logistic_cc2tensor(): lgstc = Logistic() from neon.backends.cc2 import GPU, GPUTensor inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = 1.0 / (1.0 + np.exp(-inputs)) be = GPU(rng_seed=0) temp = be.zeros((3, 1)) lgstc.apply_function(be, GPUTensor(inputs), temp) assert_tensor_near_equal(GPUTensor(outputs), temp)
def test_softmax_cc2tensor(): sftmx = Softmax() from neon.backends.cc2 import GPU, GPUTensor inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = np.exp(inputs) / np.sum(np.exp(inputs)) be = GPU(rng_seed=0) temp = be.zeros((3, 1)) sftmx.apply_function(be, GPUTensor(inputs), temp) assert_tensor_near_equal(GPUTensor(outputs), temp)
def test_tanh_cc2tensor(): tntest = Tanh() from neon.backends.cc2 import GPU, GPUTensor inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = GPUTensor([true_tanh(0), true_tanh(1), true_tanh(-2)]) be = GPU(rng_seed=0) temp = be.zeros((3, 1)) tntest.apply_function(be, GPUTensor(inputs), temp) assert_tensor_near_equal(outputs, temp)
def test_cross_entropy_derivative_cc2tensor(): from neon.backends.cc2 import GPU, GPUTensor be = GPU(rng_seed=0) outputs = GPUTensor([0.5, 0.9, 0.1, 0.0001]) targets = GPUTensor([0.5, 0.99, 0.01, 0.2]) temp = [be.zeros(outputs.shape), be.zeros(outputs.shape)] expected_result = ((outputs.asnumpyarray() - targets.asnumpyarray()) / (outputs.asnumpyarray() * (1 - outputs.asnumpyarray()))) assert_tensor_near_equal( expected_result, cross_entropy_derivative(be, outputs, targets, temp))
def test_tanh_derivative_cc2tensor(): tntest = Tanh() from neon.backends.cc2 import GPU, GPUTensor inputs = np.array([0, 1, -2], dtype='float32').reshape((3, 1)) be = GPU(rng_seed=0) outputs = GPUTensor( [1 - true_tanh(0)**2, 1 - true_tanh(1)**2, 1 - true_tanh(-2)**2]) temp = be.zeros(inputs.shape) tntest.apply_derivative(be, GPUTensor(inputs, dtype='float32'), temp) assert_tensor_near_equal(outputs, temp, tolerance=1e-5)
def compare_cc2_tensors(inputs, outputs, deriv=False): from neon.backends.cc2 import GPU, GPUTensor rlin = RectLeaky() be = GPU() temp = be.zeros(inputs.shape) if deriv is True: rlin.apply_derivative(be, GPUTensor(inputs), temp) else: rlin.apply_function(be, GPUTensor(inputs), temp) be.subtract(temp, GPUTensor(outputs), temp) assert_tensor_equal(temp, be.zeros(inputs.shape))
def test_softmax_derivative_cc2tensor(): sftmx = Softmax() from neon.backends.cc2 import GPU, GPUTensor inputs = np.array([0, 1, -2]).reshape((3, 1)) outputs = np.exp(inputs) / np.sum(np.exp(inputs)) errmat = np.ones(inputs.shape) a = np.einsum('ij,ji->i', errmat.T, outputs) outputs = outputs * (errmat - a[np.newaxis, :]) be = GPU(rng_seed=0) temp = be.zeros(inputs.shape) sftmx.apply_derivative(be, GPUTensor(inputs), temp) assert_tensor_near_equal(GPUTensor(outputs), temp)
def test_cross_entropy_cc2tensor(): from neon.backends.cc2 import GPU, GPUTensor be = GPU(rng_seed=0) # to ensure cublas_init() is called. outputs = GPUTensor([0.5, 0.9, 0.1, 0.0001]) targets = GPUTensor([0.5, 0.99, 0.01, 0.2]) temp = [be.zeros(outputs.shape), be.zeros(outputs.shape)] expected_result = np.sum( (-targets.asnumpyarray()) * np.log(outputs.asnumpyarray()) - (1 - targets.asnumpyarray()) * np.log(1 - outputs.asnumpyarray()), keepdims=True) assert_tensor_near_equal(expected_result, cross_entropy(be, outputs, targets, temp), tolerance=1e-6)
def test_xcov_cc2tensor(): np.random.seed(0) n = 10 k = 8 (k1, k2) = (3, 5) a = np.array(np.random.randn(k, n) * 10, dtype='float32', order='C') acc = xcc(a[:k1], a[k1:]) expected_result = 0.5 * (acc**2.).sum() from neon.backends.cc2 import GPU, GPUTensor be = GPU(rng_seed=0) # to ensure cublas_init() is called. outputs = GPUTensor(a.copy()) tempbuf1 = be.empty((k1, n)) tempbuf2 = be.empty((k2, n)) tempbuf3 = be.empty((k1, k2)) tempbuf4 = be.empty(outputs.shape) temp = [tempbuf1, tempbuf2, tempbuf3, tempbuf4] my_result = xcov_cost(be, outputs, [], temp, k1) assert_tensor_near_equal(expected_result, my_result, tolerance=1e-3)
def gen_backend(model=None, gpu=None, nrv=False, datapar=False, modelpar=False, flexpoint=False, rng_seed=None, numerr_handling=None, half=False, stochastic_round=0, device_id=None): """ Construct and return a backend instance of the appropriate type based on the arguments given. With no parameters, a single CPU core, float32 backend is returned. Arguments: model (neon.models.model.Model): The instantiated model upon which we will utilize this backend. gpu (string, optional): Attempt to utilize a CUDA capable GPU if installed in the system. Defaults to None which implies a CPU based backend. If 'cudanet', utilize a cuda-convnet2 based backed, which supports Kepler and Maxwell GPUs with single precision. If 'nervanagpu', attempt to utilize the NervanaGPU Maxwell backend with float16 and float32 support. nrv (bool, optional): If True, attempt to utilize the Nervana Engine for computation (must be installed on the system). Defaults to False which implies a CPU based backend. datapar (bool, optional): Set to True to ensure that data is partitioned and each chunk is processed in parallel on different compute cores. Requires mpi4py. Defaults to False which implies that all data will be processed sequentially on a single compute core. modelpar (bool, optional): Set to True to ensure that the nodes in each model layer are partitioned and distributed across multiple compute cores. Requires mpi4py. Defaults to False which implies that all nodes in all model layers will be processed by the same single compute core. flexpoint (bool, optional): If True, attempt to use FlexPoint(TM) element typed data instead of the default float32 which is in place if set to False. rng_seed (numeric, optional): Set this to a numeric value which can be used to seed the random number generator of the instantiated backend. Defaults to None, which doesn't explicitly seed (so each run will be different) stochastic_round (numeric, optional): Only affects the max backend. If 1, perform stochastic rounding. If 0, round to nearest. numerr_handling (dict, optional): Dictate how numeric errors are displayed and handled. The keys and values permissible for this dict match that seen in numpy.seterr. If set to None (the default), behavior is equivalent to {'all': 'warn'} device_id (numeric, optional): Set this to a numeric value which can be used to select which device to run the process on Returns: Backend: newly constructed backend instance of the specifed type. Notes: * Attempts to construct a GPU instance without a CUDA capable card or without cudanet or nervanagpu package installed will cause the program to display an error message and exit. * Attempts to construct a parallel instance without mpi4py installed will cause the program to display an error message and exit. * The returned backend will still need to call its par.init_model() at some point after the model has been linked, in order for parallel training to proceed. """ logger = logging.getLogger(__name__) gpuflag = False if datapar and modelpar: raise NotImplementedError('Hybrid parallelization scheme not ' 'implemented yet. Try with at most one of' 'datapar or modelpar') if modelpar: par = ModelPar() elif datapar: par = DataPar() else: par = NoPar() if par.device_id is not None: if device_id is not None: logger.warn('Ignoring device id specified in command line.') device_id = par.device_id if gpu is not None: gpu = gpu.lower() if sys.platform.startswith("linux"): gpuflag = (os.system("nvidia-smi > /dev/null 2>&1") == 0) elif sys.platform.startswith("darwin"): gpuflag = ( os.system("kextstat | grep -i cuda > /dev/null 2>&1") == 0) if gpuflag and gpu == 'cudanet': try: import cudanet # noqa from neon.backends.cc2 import GPU be_name = 'Cudanet' be = GPU(rng_seed=rng_seed, device_id=device_id) except ImportError: logger.warning("cudanet not found, can't run via GPU") gpuflag = False elif gpuflag and gpu == 'nervanagpu': try: import nervanagpu # noqa try: # import pycuda.autoinit import pycuda.driver as drv drv.init() device_id = device_id if device_id is not None else 0 global ctx ctx = drv.Device(device_id).make_context() import atexit atexit.register(ctx.pop) from neon.backends.gpu import GPU be_name = 'NervanaGPU' be = GPU(rng_seed=rng_seed, stochastic_round=stochastic_round, device_id=device_id) except ImportError: logger.warning("pycuda error, can't run via GPU") gpuflag = False except ImportError: logger.warning("nervanagpu not found, can't run via GPU") gpuflag = False if gpuflag is False: raise RuntimeError("Can't find CUDA capable GPU") elif nrv: nrv = False try: from umd.nrv_backend import NRVBackend nrv = True except ImportError: logger.warning("Nervana Engine system software not found") if flexpoint: logger.warning("Flexpoint(TM) backend not currently available") if nrv: be_name = 'NRV' be = NRVBackend(rng_seed=rng_seed, seterr_handling=numerr_handling, device_id=device_id) elif not gpuflag: be_name = 'CPU' be = CPU(rng_seed=rng_seed, seterr_handling=numerr_handling) logger.info("{} backend, RNG seed: {}, numerr: {}".format( be_name, rng_seed, numerr_handling)) par.associate(be) return be
def gen_backend(model=None, gpu=None, nrv=False, flexpoint=False, rng_seed=None, numerr_handling=None, half=False, stochastic_round=0, device_id=None): """ Construct and return a backend instance of the appropriate type based on the arguments given. With no parameters, a single CPU core, float32 backend is returned. Arguments: model (neon.models.model.Model): The instantiated model upon which we will utilize this backend. gpu (string, optional): Attempt to utilize a CUDA capable GPU if installed in the system. Defaults to None which implies a CPU based backend. If 'cudanet', utilize a cuda-convnet2 based backed, which supports Kepler and Maxwell GPUs with single precision. If 'nervanagpu', attempt to utilize the NervanaGPU Maxwell backend with float16 and float32 support. nrv (bool, optional): If True, attempt to utilize the Nervana Engine for computation (must be installed on the system). Defaults to False which implies a CPU based backend. rng_seed (numeric, optional): Set this to a numeric value which can be used to seed the random number generator of the instantiated backend. Defaults to None, which doesn't explicitly seed (so each run will be different) stochastic_round (numeric, optional): Only affects the max backend. If 1, perform stochastic rounding. If 0, round to nearest. numerr_handling (dict, optional): Dictate how numeric errors are displayed and handled. The keys and values permissible for this dict match that seen in numpy.seterr. If set to None (the default), behavior is equivalent to {'all': 'warn'} device_id (numeric, optional): Set this to a numeric value which can be used to select which device to run the process on Returns: Backend: newly constructed backend instance of the specifed type. Notes: * Attempts to construct a GPU instance without a CUDA capable card or without cudanet or nervanagpu package installed will cause the program to display an error message and exit. * Attempts to construct a parallel instance without mpi4py installed will cause the program to display an error message and exit. * The returned backend will still need to call its par.init_model() at some point after the model has been linked, in order for parallel training to proceed. """ logger = logging.getLogger(__name__) gpuflag = False if gpu is not None: gpu = gpu.lower() if sys.platform.startswith("linux"): gpuflag = (os.system("nvcc --version > /dev/null 2>&1") == 0) elif sys.platform.startswith("darwin"): gpuflag = ( os.system("kextstat | grep -i cuda > /dev/null 2>&1") == 0) if gpuflag and gpu == 'cudanet': try: import cudanet # noqa from neon.backends.cc2 import GPU be_name = 'Cudanet' be = GPU(rng_seed=rng_seed, device_id=device_id) except ImportError: raise RuntimeError("cudanet not found, can't run via GPU") elif gpuflag and gpu.startswith('nervanagpu'): try: import nervanagpu # noqa try: be_name = 'NervanaGPU' if gpu == 'nervanagpu': device_id = 0 if device_id is None else device_id[0] from neon.backends.gpu import GPU be = GPU(rng_seed=rng_seed, stochastic_round=stochastic_round, device_id=device_id) else: from neon.backends.mgpu import MGPU try: num_dev = int(gpu.strip('nervanagpu')) except (ValueError): raise ValueError("invalid number of GPUs" + " specified") if not device_id: device_id = range(num_dev) if len(device_id) != num_dev: raise RuntimeError( "Incorrect number of devices" " specified ", device_id, num_dev) be = MGPU(rng_seed=rng_seed, stochastic_round=stochastic_round, device_id=device_id, num_dev=num_dev) except ImportError: logger.warning("pycuda error, can't run via GPU") gpuflag = False except ImportError: logger.warning("nervanagpu not found, can't run via GPU") gpuflag = False if gpuflag is False: raise RuntimeError("Can't find CUDA capable GPU") elif nrv: nrv = False try: from umd.nrv_backend import NRVBackend nrv = True except ImportError: logger.warning("Nervana Engine system software not found") if flexpoint: logger.warning("Flexpoint(TM) backend not currently available") if nrv: be_name = 'NRV' be = NRVBackend(rng_seed=rng_seed, seterr_handling=numerr_handling, device_id=device_id) elif not gpuflag: be_name = 'CPU' be = CPU(rng_seed=rng_seed, seterr_handling=numerr_handling) logger.info("{} backend, RNG seed: {}, numerr: {}".format( be_name, rng_seed, numerr_handling)) return be
def setup(self): from neon.backends.cc2 import GPU, GPUTensor # this code gets called prior to each test self.be = GPU(rng_seed=0) self.gpt = GPUTensor
def test_gpu_bprop(self): from neon.backends.cc2 import GPU backend = GPU(rng_seed=0) layer = self.create_layer(backend=backend) check_bprop(layer, backend)