def train_ensemble(prototype_net, dataset, outfile=None, n_nets=10, use_gpu=True): ''' Trains a given number of networks on a given dataset. All networks will be clones of the given prototoype, and they will all be pickled into the given outfile.''' from binet import op if use_gpu: gc.collect() if not op._IS_CUDA_INITIALIZED: logger = logging.getLogger(__name__) logger.warn("CUDA not initialized, initializing GPU 0") op.init_gpu(0) X, y, Xvalid, yvalid = [op.to_gpu(d) for d in dataset] prototype_net = op.to_gpu(prototype_net) else: X, y, Xvalid, yvalid = dataset if outfile is not None: f = open(outfile, "wb") nets = [] try: for i in range(n_nets): prototype_net.reset() if use_gpu: prototype_net = op.to_gpu(prototype_net) prototype_net.fit(X, y, Xvalid, yvalid) prototype_net = op.to_cpu(prototype_net) nets.append(copy.deepcopy(prototype_net)) if outfile is not None: pickle.dump(prototype_net, f, -1) finally: if outfile is not None: f.close() return nets
def run_function(X, Y_expected, func, rtol=1e-6, with_inplace_test=True, **kwargs): # CPU, with target argument Y = np.empty_like(Y_expected) Yhr = func(X, out=Y, **kwargs) assert_allclose(Y_expected, Yhr, err_msg="CPU with target", rtol=rtol) assert Yhr is Y # CPU, no target argument Yhr = func(X, **kwargs) assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol) if with_inplace_test: X2 = X.copy() Yhr = func(X2, out=X2, **kwargs) assert_allclose(Y_expected, Yhr, err_msg="CPU, inplace target", rtol=rtol) assert Yhr is X2 kwargs = op.to_gpu(kwargs) # GPU, with target Xd = op.to_gpu(X) Yd = gpuarray.empty_like(op.to_gpu(Y_expected)) Ydr = func(Xd, out=Yd, **kwargs) assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU with target", rtol=rtol) assert Ydr is Yd # GPU, no target Ydr = func(Xd, **kwargs) assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, no target", rtol=rtol) if with_inplace_test: Ydr = func(Xd, out=Xd, **kwargs) assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, inplace target", rtol=rtol) assert Ydr is Xd
def test_add_vec(): x = 5.0 * np.random.randn(10).astype(np.float32) y = 10.0 * np.random.randn(10).astype(np.float32) x_orig = x.copy() alpha = 2.5 z = x + alpha*y rtol = 1e-4 op.add_vec(x, alpha, y) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol) x = x_orig.copy() alpha = 2.5 beta = 0.5 z = beta*x + alpha*y rtol = 1e-4 op.add_vec(x, alpha, y, beta) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd, beta) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol)
def test_add_vec(): x = 5.0 * np.random.randn(10).astype(np.float32) y = 10.0 * np.random.randn(10).astype(np.float32) x_orig = x.copy() alpha = 2.5 z = x + alpha * y rtol = 1e-4 op.add_vec(x, alpha, y) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol) x = x_orig.copy() alpha = 2.5 beta = 0.5 z = beta * x + alpha * y rtol = 1e-4 op.add_vec(x, alpha, y, beta) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd, beta) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol)
def test_togpu(): X = np.random.randn(3, 5) Xd = op.to_gpu(X) assert type(Xd) == gpuarray.GPUArray assert Xd.shape == X.shape Xd2 = op.to_gpu(Xd) assert Xd2 is Xd
def train(net, dataset, fname=None, skip_output=25, show_plots=False, use_gpu=True, **kwargs): ''' Trains a neural network on the given dataset. If desired, the log-statements during training can be buffered into a StringIO object. This has the drawback that the output is only visible once the net has been fully trained, but it allows to only print only every n-th message. Parameters ---------- net: the neural net. dataset: tuple containing 'trainx', 'trainy', 'validx', 'validy' fname: file-name in which to store the (pickled) network after training. The file will be stored in the 'data' subfolder of the CWD. skip_output: how many lines of output to skip between two lines that will actually be printed. show_plots: If True, plot the first 256 weights of the lowest layer. use_gpu: if True, use gnumpy to run the code on the GPU. **kwargs: additional parameters for the `plotImages` cool when `plot_weights=True`. ''' from binet import op if use_gpu: gc.collect() if not op._IS_CUDA_INITIALIZED: logger = logging.getLogger(__name__) logger.warn("CUDA not initialized, initializing GPU 0") op.init_gpu(0) X, y, Xvalid, yvalid = [op.to_gpu(d) for d in dataset] net = op.to_gpu(net) else: X, y, Xvalid, yvalid = dataset try: init_out = net.transform(X) init_err = net._get_loss(y, init_out) net.track_progress(time.time(), init_err, X, y, Xvalid, yvalid) net.fit(X, y, Xvalid, yvalid, skip_output=skip_output) #if net.verbose and net.current_epoch % skip_output != 0: # make sure we show the last line # net.track_progress(time.time(), -1, X, y, Xvalid, yvalid) except KeyboardInterrupt: print("Intercepted KeyboardInterrupt, stopping... current status:") net.track_progress(time.time(), -1, X, y, Xvalid, yvalid) net.statistics = net.statistics[:-1] # we just added an invalid point finally: net = op.to_cpu(net) if fname: if not os.path.exists("data"): warnings.warn("creating 'data' directory to store pickled net") os.mkdir("data") with open(os.path.join("data", fname), "wb") as f: pickle.dump(net, f, -1) if show_plots: plot_images(net.weights[0], 16, 16, **kwargs) plot_learning_curves(net, **kwargs) return net
def test_dsigmoid_delta(): X = np.random.randn(3, 5).astype(np.float32) A = 5*np.random.randn(30, 50).astype(np.float32) D = 5*np.random.randn(30, 50).astype(np.float32) D_expected = D * A*(1 - A) Dd = op.to_gpu(D) Yh = op.dsigmoid_delta(D, A, X) assert_allclose(D_expected, D, rtol=1e-5, err_msg="CPU") Ad = op.to_gpu(A) Xd = op.to_gpu(X) op.dsigmoid_delta(Dd, Ad, Xd) assert_allclose(D_expected, op.to_cpu(Dd), rtol=1e-5, err_msg="GPU")
def test_dsigmoid_delta(): X = np.random.randn(3, 5).astype(np.float32) A = 5 * np.random.randn(30, 50).astype(np.float32) D = 5 * np.random.randn(30, 50).astype(np.float32) D_expected = D * A * (1 - A) Dd = op.to_gpu(D) Yh = op.dsigmoid_delta(D, A, X) assert_allclose(D_expected, D, rtol=1e-5, err_msg="CPU") Ad = op.to_gpu(A) Xd = op.to_gpu(X) op.dsigmoid_delta(Dd, Ad, Xd) assert_allclose(D_expected, op.to_cpu(Dd), rtol=1e-5, err_msg="GPU")
def test_toplayer_delta(): X = np.random.randn(3, 5).astype(np.float32) A = 5 * np.random.randn(30, 50).astype(np.float32) D = 5 * np.random.randn(30, 50).astype(np.float32) D_expected = D.copy() D_expected = A - D_expected Dd = op.to_gpu(D) Yh = op.toplayer_delta(A, D, X) assert_allclose(D_expected, Yh, rtol=1e-5, err_msg="CPU") Ad = op.to_gpu(A) Xd = op.to_gpu(X) Yhd = op.toplayer_delta(Ad, Dd, Xd) assert_allclose(D_expected, op.to_cpu(Yhd), rtol=1e-5, err_msg="GPU")
def test_toplayer_delta(): X = np.random.randn(3, 5).astype(np.float32) A = 5*np.random.randn(30, 50).astype(np.float32) D = 5*np.random.randn(30, 50).astype(np.float32) D_expected = D.copy() D_expected = A - D_expected Dd = op.to_gpu(D) Yh = op.toplayer_delta(A, D, X) assert_allclose(D_expected, Yh, rtol=1e-5, err_msg="CPU") Ad = op.to_gpu(A) Xd = op.to_gpu(X) Yhd = op.toplayer_delta(Ad, Dd, Xd) assert_allclose(D_expected, op.to_cpu(Yhd), rtol=1e-5, err_msg="GPU")
def partial_fit(self, X, y, encode_labels=True): ''' Runs one epoch of minibatch-backprop on the given data. Note: Input-Dropout might overwrite parts of X! Expects y in One-Hot format''' if not sparse.isspmatrix_csr(X): assert (X.flags.c_contiguous) cur_lr, cur_momentum = self._get_current_learningrate( self.current_epoch) err = 0.0 nbatches = 0 for s in generate_slices(X.shape[0], self.batch_size, \ self.ignore_last_minibatch_if_smaller): Xtemp = X[s] ytemp = y[s] # for sparse matrices, the fastest option is to convert to # dense on the GPU and then operate in dense if sparse.isspmatrix_csr(X) and isinstance(self.layers[0].W, op.gpuarray.GPUArray): a = op.cuda_memory_pool.allocate #Xtemp = op.to_gpu(Xtemp.A, stream=op.streams[0]) #ytemp = op.to_gpu(ytemp, stream=op.streams[1]) Xtemp = op.GPUCSRArray(Xtemp, allocator=a, stream=op.streams[0]) #Xtemp = Xtemp.todense(allocator=a, stream=op.streams[0]) if sparse.isspmatrix_csr(ytemp): ytemp = op.to_gpu(ytemp.toarray(), stream=op.streams[1]) else: ytemp = op.to_gpu(ytemp, stream=op.streams[1]) out = self.forward_pass(Xtemp) op.streams[1].synchronize() self.backward_pass(out, ytemp, cur_momentum) op.streams[2].synchronize() for i, l in enumerate(self.layers): l.update(cur_lr[i], stream=op.streams[i % len(op.streams)]) self.update_count += 1 batch_error = self._get_loss(ytemp, out) err += batch_error nbatches += 1 for cb in self._minibatch_callbacks: cb(self, batch_error, Xtemp, ytemp) self.current_epoch += 1 return err / nbatches
def test_gpusparseA_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(5, 3)).astype(np.float32) A[A < 0.1] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.normal(size=(3, 6)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c') X_exp = (A * B) + 0.5 * C Ad = GPUCSRArray(A) Bd = op.to_gpu(B) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
def test_add_matvec(): X = np.random.randn(3, 4).astype(np.float32) b1 = np.random.randn(4, 1).astype(np.float32) b2 = np.random.randn(3, 1).astype(np.float32) Y_expected1 = X + b1.T Y_expected2 = X + b2 assert_allclose(Y_expected1, op.add_matvec(X, b1, 1)) assert_allclose(Y_expected2, op.add_matvec(X, b2, 0)) Xd = op.to_gpu(X) b1d = op.to_gpu(b1) b2d = op.to_gpu(b2) assert_allclose(Y_expected1, op.to_cpu(op.add_matvec(Xd, b1d, 1))) assert_allclose(Y_expected2, op.to_cpu(op.add_matvec(Xd, b2d, 0)))
def test_gpusparseB_sgemm_tb(): from scipy.sparse import csr_matrix B = np.random.laplace(size=(3, 5)).astype(np.float32) B[B<0.1] = 0 B = csr_matrix(B, dtype=np.float32) A = np.random.normal(size=(4, 5)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c') X_exp = (A*B.T) + 0.5*C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
def tes_deactivate_t_gpusparseB_sgemm_ta_bug(): from scipy.sparse import csr_matrix A = np.random.normal(size=(6, 12)).astype(np.float32, order="c") B = np.random.laplace(size=(6, 33)).astype(np.float32) B[B<0.1] = 0 B = csr_matrix(B, dtype=np.float32) C = np.ones((12, 33), dtype=np.float32, order='c') X_exp = (A.T*B) + 0.5*C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-3, err_msg="gpusparse_sgemmB ta bug")
def test_gpusparseA_sgemm(): from scipy.sparse import csr_matrix A = np.random.laplace(size=(5, 3)).astype(np.float32) A[A<0.1] = 0 A = csr_matrix(A, dtype=np.float32) B = np.random.normal(size=(3, 6)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c') X_exp = (A*B) + 0.5*C Ad = GPUCSRArray(A) Bd = op.to_gpu(B) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
def test_gpusparseB_sgemm_tb(): from scipy.sparse import csr_matrix B = np.random.laplace(size=(3, 5)).astype(np.float32) B[B < 0.1] = 0 B = csr_matrix(B, dtype=np.float32) A = np.random.normal(size=(4, 5)).astype(np.float32, order="c") C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c') X_exp = (A * B.T) + 0.5 * C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
def test_crossentropy(): X = np.random.rand(100, 10).astype(np.float32) O = np.random.rand(100, 10).astype(np.float32) X /= X.sum(1)[:, None] O /= O.sum(1)[:, None] Y_expected = -np.sum(X * np.log(O)) / X.shape[0] rtol=1e-4 Y = np.empty_like(X) Yhr = op.cross_entropy(X, O) assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol) Xd = op.to_gpu(X) Od = op.to_gpu(O) Yd = op.cross_entropy(Xd, Od) assert_allclose(Y_expected, op.to_cpu(Yd), err_msg="GPU, no target", rtol=rtol)
def test_gpucpu_fprop_equality(): '''Test forward propagation CPU/GPU equality.''' neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]]) netd = op.to_gpu(copy.deepcopy(neth)) outh = neth.forward_pass(X) outd = netd.forward_pass(Xd) assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error")
def test_tognumpy_list(): X = [np.random.randn(3, 5), "teststring"] Xd = op.to_gpu(X) Xh = op.to_cpu(Xd) assert type(Xh[0]) == np.ndarray assert Xh[0].shape == X[0].shape assert_array_equal(Xh[0], X[0])
def test_randomly_replace_elements(): for val in (0.0, 0.5, 5): for p in (0.1, 0.2, 0.5, 0.75, 0.99): X = np.random.normal(size=(1024, 2048)).astype(np.float32) Xd = op.to_gpu(X) Xr, M = op.randomly_replace_elements(X, p, val) assert (Xr is X) assert_almost_equal((X == val).mean(), p, decimal=2, err_msg="val: %.1f p: %.1f" % (val, p)) assert_almost_equal(M.mean(), 1 - p, decimal=2, err_msg="M val: %.1f p: %.1f" % (val, p)) Xrd, Md = op.randomly_replace_elements(Xd, p, val) assert (Xrd is Xd) assert_almost_equal(op.to_cpu(op.mean(Xd == val)), p, decimal=2, err_msg="val: %.1f p: %.1f (gpu)" % (val, p)) assert_almost_equal(op.to_cpu(op.mean(Md)), 1 - p, decimal=2, err_msg="M val: %.1f p: %.1f (gpu)" % (val, p))
def run_function_with_axis(X, ax0_expected, ax1_expected, noax_expected, func, rtol=1e-6): # CPU, no target argument ah0 = func(X, axis=0) assert_allclose(ax0_expected, ah0, err_msg="CPU, axis=0", rtol=rtol) ah1 = func(X, axis=1) assert_allclose(ax1_expected, ah1, err_msg="CPU, axis=1", rtol=rtol) if noax_expected is not None: ah = func(X) assert_allclose(noax_expected, ah, err_msg="CPU, axis=1", rtol=rtol) Xd = op.to_gpu(X) # GPU, no target ad0 = func(Xd, axis=0) assert_allclose(ax0_expected, op.to_cpu(ad0), err_msg="GPU, axis=0", rtol=rtol) ad1 = func(Xd, axis=1) assert_allclose(ax1_expected, op.to_cpu(ad1), err_msg="GPU, axis=1", rtol=rtol) if noax_expected is not None: ad = func(Xd) assert_allclose(noax_expected, op.to_cpu(ad), err_msg="GPU, axis=1", rtol=rtol)
def partial_fit(self, X, y, encode_labels=True): ''' Runs one epoch of minibatch-backprop on the given data. Note: Input-Dropout might overwrite parts of X! Expects y in One-Hot format''' if not sparse.isspmatrix_csr(X): assert(X.flags.c_contiguous) cur_lr, cur_momentum = self._get_current_learningrate(self.current_epoch) err = 0.0 nbatches = 0 for s in generate_slices(X.shape[0], self.batch_size, \ self.ignore_last_minibatch_if_smaller): Xtemp = X[s] ytemp = y[s] # for sparse matrices, the fastest option is to convert to # dense on the GPU and then operate in dense if sparse.isspmatrix_csr(X) and isinstance(self.layers[0].W, op.gpuarray.GPUArray): a = op.cuda_memory_pool.allocate #Xtemp = op.to_gpu(Xtemp.A, stream=op.streams[0]) #ytemp = op.to_gpu(ytemp, stream=op.streams[1]) Xtemp = op.GPUCSRArray(Xtemp, allocator=a, stream=op.streams[0]) #Xtemp = Xtemp.todense(allocator=a, stream=op.streams[0]) if sparse.isspmatrix_csr(ytemp): ytemp = op.to_gpu(ytemp.toarray(), stream=op.streams[1]) else: ytemp = op.to_gpu(ytemp, stream=op.streams[1]) out = self.forward_pass(Xtemp) op.streams[1].synchronize() self.backward_pass(out, ytemp, cur_momentum) op.streams[2].synchronize() for i, l in enumerate(self.layers): l.update(cur_lr[i], stream=op.streams[i % len(op.streams)]) self.update_count += 1 batch_error = self._get_loss(ytemp, out) err += batch_error nbatches += 1 for cb in self._minibatch_callbacks: cb(self, batch_error, Xtemp, ytemp) self.current_epoch += 1 return err / nbatches
def test_togpu_dict(): X = {'arr': np.random.randn(3, 5), 'str': "teststring"} X_orig = copy.deepcopy(X) Xd = op.to_gpu(X) assert type(Xd['arr']) == op.gpuarray.GPUArray assert Xd['arr'].shape == X_orig['arr'].shape Xh = op.to_cpu(Xd['arr']) assert_allclose(Xh, X_orig['arr'])
def test_togpu_list(): X = [np.random.randn(3, 5), "teststring"] X_orig = copy.deepcopy(X) Xd = op.to_gpu(X) assert type(Xd[0]) == op.gpuarray.GPUArray assert Xd[0].shape == X_orig[0].shape Xh = op.to_cpu(Xd[0]) assert_allclose(Xh, X_orig[0])
def tes_deactivate_t_gpusparseB_sgemm_ta_bug(): from scipy.sparse import csr_matrix A = np.random.normal(size=(6, 12)).astype(np.float32, order="c") B = np.random.laplace(size=(6, 33)).astype(np.float32) B[B < 0.1] = 0 B = csr_matrix(B, dtype=np.float32) C = np.ones((12, 33), dtype=np.float32, order='c') X_exp = (A.T * B) + 0.5 * C Bd = GPUCSRArray(B) Ad = op.to_gpu(A) Cd = op.to_gpu(C) Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5) assert_allclose(Xd.get(), X_exp, rtol=1e-3, err_msg="gpusparse_sgemmB ta bug")
def test_togpu_class(): class MyTest: def __init__(self): self.X = np.random.randn(3, 5) t = MyTest() Td = op.to_gpu(t) assert type(Td.X) == gpuarray.GPUArray, "type is %s" % type(Td.X) assert Td.X.shape == (3, 5)
def test_crossentropy(): X = np.random.rand(100, 10).astype(np.float32) O = np.random.rand(100, 10).astype(np.float32) X /= X.sum(1)[:, None] O /= O.sum(1)[:, None] Y_expected = -np.sum(X * np.log(O)) / X.shape[0] rtol = 1e-4 Y = np.empty_like(X) Yhr = op.cross_entropy(X, O) assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol) Xd = op.to_gpu(X) Od = op.to_gpu(O) Yd = op.cross_entropy(Xd, Od) assert_allclose(Y_expected, op.to_cpu(Yd), err_msg="GPU, no target", rtol=rtol)
def test_tonumpy_class(): class MyTest: def __init__(self): self.X = np.random.randn(3, 5) t = MyTest() Td = op.to_gpu(t) Th = op.to_cpu(Td) assert type(Th.X) == np.ndarray assert Th.X.shape == (3, 5)
def test_nan_in_toplayer_delta(): size = (200, 10) X = np.random.normal(size=size).astype(np.float32, order="c") A = op.sigmoid(X) Y = np.random.binomial(1.0, p=0.5, size=size).astype(np.float32) M = np.random.binomial(1.0, p=0.9, size=size).astype(np.float32) Y[~M.astype(np.bool)] = np.nan Y_orig = Y.copy() D = M * (A - Y) D[~M.astype(np.bool)] = 0.0 Y = op.toplayer_delta(A, Y, X) assert_allclose(Y, D) Yd = op.to_gpu(Y_orig) Ad = op.to_gpu(A) Xd = op.to_gpu(X) Yd = op.toplayer_delta(Ad, Yd, Xd) assert_allclose(Yd.get(), D)
def test_reorderrows(): n = 1270 X = 5 * np.random.randn(n, 1000).astype(np.float32) idx = list(range(X.shape[0])) np.random.shuffle(idx) Xd = op.to_gpu(X) Xoutd = gpuarray.empty_like(Xd) op.reorder_rows(Xd, idx, Xoutd) assert_allclose(X[idx], Xoutd.get()) assert_allclose(X[idx], op.reorder_rows(X, idx))
def test_reorderrows(): n = 1270 X = 5*np.random.randn(n, 1000).astype(np.float32) idx = list(range(X.shape[0])) np.random.shuffle(idx) Xd = op.to_gpu(X) Xoutd = gpuarray.empty_like(Xd) op.reorder_rows(Xd, idx, Xoutd) assert_allclose(X[idx], Xoutd.get()) assert_allclose(X[idx], op.reorder_rows(X, idx))
def test_tonumpy(): X = np.random.randn(3, 5) Xd = op.to_gpu(X) Xh = op.to_cpu(Xd) assert type(Xh) == np.ndarray assert Xh.shape == X.shape assert_allclose(Xh, X) X2 = op.to_cpu(X) assert X2 is X
def test_l1reg(): # NOTE: you could argue wether it's okay to "jump over zero" # when applying both the regular gradient and the L1 gradient l1_penalty=0.005 w = np.array( [3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32) dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32) eta = 1.0 nw = w + dw - l1_penalty*np.sign(w) expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw)) y = np.empty_like(dw) op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y) assert_allclose(expected, y) wd = op.to_gpu(w) dwd = op.to_gpu(dw) yd = op.to_gpu(np.empty_like(dw)) op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd) assert_allclose(expected, op.to_cpu(yd))
def test_l1reg(): # NOTE: you could argue wether it's okay to "jump over zero" # when applying both the regular gradient and the L1 gradient l1_penalty = 0.005 w = np.array([3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32) dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32) eta = 1.0 nw = w + dw - l1_penalty * np.sign(w) expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw)) y = np.empty_like(dw) op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y) assert_allclose(expected, y) wd = op.to_gpu(w) dwd = op.to_gpu(dw) yd = op.to_gpu(np.empty_like(dw)) op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd) assert_allclose(expected, op.to_cpu(yd))
def test_softmax(): X = np.random.randn(30, 50).astype(np.float32) E = np.exp(X) Y_expected = E / np.sum(E, axis=1).reshape(-1, 1) run_function(X, Y_expected, op.softmax, rtol=1e-4) X = 10000*np.random.randn(30, 50).astype(np.float32) Y = op.softmax(X) assert np.all(np.isfinite(Y)) Y = op.softmax(op.to_gpu(X)) assert np.all(np.isfinite(op.to_cpu(Y)))
def test_softmax(): X = np.random.randn(30, 50).astype(np.float32) E = np.exp(X) Y_expected = E / np.sum(E, axis=1).reshape(-1, 1) run_function(X, Y_expected, op.softmax, rtol=1e-4) X = 10000 * np.random.randn(30, 50).astype(np.float32) Y = op.softmax(X) assert np.all(np.isfinite(Y)) Y = op.softmax(op.to_gpu(X)) assert np.all(np.isfinite(op.to_cpu(Y)))
def test_csrmm_bug(): ''' the 2nd call might crash''' from scipy.sparse import csr_matrix W = np.random.normal(size=(5, 3)).astype(np.float32, order="c") X = np.random.laplace(size=(6, 3)).astype(np.float32) X[X<0.1] = 0 X = csr_matrix(X, dtype=np.float32) Xd = GPUCSRArray(X) Wd = op.to_gpu(W) Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0]) op.add_dot(Cd, Xd, Wd, True, False, alpha=-0.3, beta=1.0, stream=op.streams[0]) op.mean(Cd, axis=0, stream=op.streams[1])
def test_swaprows(): n = 1270 X = 5.0 * np.random.randn(n, 1000).astype(np.float32) ytemp = np.array(range(X.shape[0]))[:, None] y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32) idx = list(range(X.shape[0])) idx = np.array(idx, dtype=np.int32) np.random.shuffle(idx) Xd = op.to_gpu(X) yd = op.to_gpu(y) Xoutd = gpuarray.empty_like(Xd) youtd = gpuarray.empty_like(yd) op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx) X2 = op.to_cpu(Xoutd) y2 = op.to_cpu(youtd) assert_allclose(X[idx], X2) assert_allclose(y[idx], y2)
def test_swaprows(): n = 1270 X = 5.0*np.random.randn(n, 1000).astype(np.float32) ytemp = np.array(range(X.shape[0]))[:, None] y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32) idx = list(range(X.shape[0])) idx = np.array(idx, dtype=np.int32) np.random.shuffle(idx) Xd = op.to_gpu(X) yd = op.to_gpu(y) Xoutd = gpuarray.empty_like(Xd) youtd = gpuarray.empty_like(yd) op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx) X2 = op.to_cpu(Xoutd) y2 = op.to_cpu(youtd) assert_allclose(X[idx], X2) assert_allclose(y[idx], y2)
def test_randomly_replace_elements(): for val in (0.0, 0.5, 5): for p in (0.1, 0.2, 0.5, 0.75, 0.99): X = np.random.normal(size=(1024, 2048)).astype(np.float32) Xd = op.to_gpu(X) Xr, M = op.randomly_replace_elements(X, p, val) assert(Xr is X) assert_almost_equal((X == val).mean(), p, decimal=2, err_msg="val: %.1f p: %.1f" % (val, p)) assert_almost_equal(M.mean(), 1-p, decimal=2, err_msg="M val: %.1f p: %.1f" % (val, p)) Xrd, Md = op.randomly_replace_elements(Xd, p, val) assert(Xrd is Xd) assert_almost_equal(op.to_cpu(op.mean(Xd == val)), p, decimal=2, err_msg="val: %.1f p: %.1f (gpu)" % (val, p)) assert_almost_equal(op.to_cpu(op.mean(Md)), 1-p, decimal=2, err_msg="M val: %.1f p: %.1f (gpu)" % (val, p))
def test_csrmm_bug(): ''' the 2nd call might crash''' from scipy.sparse import csr_matrix W = np.random.normal(size=(5, 3)).astype(np.float32, order="c") X = np.random.laplace(size=(6, 3)).astype(np.float32) X[X < 0.1] = 0 X = csr_matrix(X, dtype=np.float32) Xd = GPUCSRArray(X) Wd = op.to_gpu(W) Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0]) op.add_dot(Cd, Xd, Wd, True, False, alpha=-0.3, beta=1.0, stream=op.streams[0]) op.mean(Cd, axis=0, stream=op.streams[1])
def test_gpucpu_bprop_equality(): '''Test backpropagation CPU/GPU equality.''' neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]]) netd = op.to_gpu(copy.deepcopy(neth)) outh = neth.forward_pass(X) lh = neth.backward_pass(outh, y) outd = netd.forward_pass(Xd) ld = netd.backward_pass(outd, yd) assert_almost_equal(lh, ld) assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error") for i in reversed(range(len(neth.layersizes) - 1)): dwh = neth.layers[i].dW dbh = neth.layers[i].db dwd = netd.layers[i].dW.get() dbd = netd.layers[i].db.get() assert_allclose(dwh, dwd, atol=1e-5, err_msg="dW diff in layer %d" % i) assert_allclose(dbh, dbd, atol=1e-5, err_msg="db diff in layer %d" % i)
def test_gpucpu_bprop_equality(): '''Test backpropagation CPU/GPU equality.''' neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]]) netd = op.to_gpu(copy.deepcopy(neth)) outh = neth.forward_pass(X) lh = neth.backward_pass(outh, y) outd = netd.forward_pass(Xd) ld = netd.backward_pass(outd, yd) assert_almost_equal(lh, ld) assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error") for i in reversed(range(len(neth.layersizes)-1)): dwh = neth.layers[i].dW dbh = neth.layers[i].db dwd = netd.layers[i].dW.get() dbd = netd.layers[i].db.get() assert_allclose(dwh, dwd, atol=1e-5, err_msg="dW diff in layer %d" % i) assert_allclose(dbh, dbd, atol=1e-5, err_msg="db diff in layer %d" % i)
def test_rand_gaussian(): X = np.empty((4000, 1000), dtype=np.float32) Y = op.rand_gaussian_like(X) rtol = 1e-3 assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean() assert Y.std() - 1.0 < rtol, "std: %f" % Y.std() Y = op.rand_gaussian_like(X, mu=5.0, sigma=2.0) rtol = 1e-3 assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean() assert Y.std() - 2.0 < rtol, "std: %f" % Y.std() Xd = op.to_gpu(X) Yd = gpuarray.empty_like(Xd) Y = op.to_cpu(op.rand_gaussian_like(Xd)) rtol = 1e-2 assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean() assert Y.std() - 1.0 < rtol, "std: %f" % Y.std() Y = op.to_cpu(op.rand_gaussian_like(Xd, mu=5.0, sigma=2.0)) rtol = 1e-2 assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean() assert Y.std() - 2.0 < rtol, "std: %f" % Y.std()
def test_rand(): X = np.empty((1000, 1000), dtype=np.float32) Y = op.rand_uniform_like(X) rtol = 1e-3 assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean() assert Y.min() >= 0.0, "min: %f" % Y.min() assert Y.min() - 0.0 < rtol, "min: %f" % Y.min() assert Y.max() <= 1.0, "max: %f" % Y.max() assert Y.max() - 1.0 - rtol, "max: %f" % Y.max() Y = np.empty_like(X) out = op.rand_uniform_like(X, out=Y) assert out is Y assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean() assert Y.min() >= 0.0, "min: %f" % Y.min() assert Y.min() - 0.0 < rtol, "min: %f" % Y.min() assert Y.max() <= 1.0, "max: %f" % Y.max() assert Y.max() - 1.0 - rtol, "max: %f" % Y.max() Xd = op.to_gpu(X) Yd = gpuarray.empty_like(Xd) Y = op.to_cpu(op.rand_uniform_like(Xd)) assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean() assert Y.min() >= 0.0, "min: %f" % Y.min() assert Y.min() - 0.0 < rtol, "min: %f" % Y.min() assert Y.max() <= 1.0, "max: %f" % Y.max() assert Y.max() - 1.0 - rtol, "max: %f" % Y.max() out = op.rand_uniform_like(Xd, out=Yd) assert out is Yd Y = op.to_cpu(Yd) assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean() assert Y.min() >= 0.0, "min: %f" % Y.min() assert Y.min() - 0.0 < rtol, "min: %f" % Y.min() assert Y.max() <= 1.0, "max: %f" % Y.max() assert Y.max() - 1.0 - rtol, "max: %f" % Y.max()