Beispiel #1
0
def train_ensemble(prototype_net, dataset, outfile=None, n_nets=10, use_gpu=True):
    ''' Trains a given number of networks on a given dataset.

    All networks will be clones of the given prototoype, and they will all
    be pickled into the given outfile.'''
    from binet import op
    if use_gpu:
        gc.collect()
        if not op._IS_CUDA_INITIALIZED:
            logger = logging.getLogger(__name__)
            logger.warn("CUDA not initialized, initializing GPU 0")
            op.init_gpu(0)

        X, y, Xvalid, yvalid = [op.to_gpu(d) for d in dataset]
        prototype_net = op.to_gpu(prototype_net)
    else:
        X, y, Xvalid, yvalid = dataset
    if outfile is not None:
        f = open(outfile, "wb")
    nets = []
    try:
        for i in range(n_nets):
            prototype_net.reset()
            if use_gpu:
                prototype_net = op.to_gpu(prototype_net)
            prototype_net.fit(X, y, Xvalid, yvalid)
            prototype_net = op.to_cpu(prototype_net)
            nets.append(copy.deepcopy(prototype_net))
            if outfile is not None:
                pickle.dump(prototype_net, f, -1)
    finally:
        if outfile is not None:
            f.close()
    return nets
Beispiel #2
0
def run_function(X, Y_expected, func, rtol=1e-6, with_inplace_test=True, **kwargs):
    # CPU, with target argument
    Y = np.empty_like(Y_expected)
    Yhr = func(X, out=Y, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU with target", rtol=rtol)
    assert Yhr is Y

    # CPU, no target argument
    Yhr = func(X, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)

    if with_inplace_test:
        X2 = X.copy()
        Yhr = func(X2, out=X2, **kwargs)
        assert_allclose(Y_expected, Yhr, err_msg="CPU, inplace target", rtol=rtol)
        assert Yhr is X2

    kwargs = op.to_gpu(kwargs)

    # GPU, with target
    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(op.to_gpu(Y_expected))
    Ydr = func(Xd, out=Yd, **kwargs)
    assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU with target", rtol=rtol)
    assert Ydr is Yd

    # GPU, no target
    Ydr = func(Xd, **kwargs)
    assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, no target", rtol=rtol)

    if with_inplace_test:
        Ydr = func(Xd, out=Xd, **kwargs)
        assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, inplace target", rtol=rtol)
        assert Ydr is Xd
Beispiel #3
0
def test_add_vec():
    x = 5.0 * np.random.randn(10).astype(np.float32)
    y = 10.0 * np.random.randn(10).astype(np.float32)
    x_orig = x.copy()
    alpha = 2.5
    z = x + alpha*y
    rtol = 1e-4

    op.add_vec(x, alpha, y)
    assert_allclose(z, x, err_msg="CPU", rtol=rtol)

    xd = op.to_gpu(x_orig)
    yd = op.to_gpu(y)
    op.add_vec(xd, alpha, yd)
    res = op.to_cpu(xd)
    assert_allclose(z, res, err_msg="GPU", rtol=rtol)

    x = x_orig.copy()
    alpha = 2.5
    beta = 0.5
    z = beta*x + alpha*y
    rtol = 1e-4

    op.add_vec(x, alpha, y, beta)
    assert_allclose(z, x, err_msg="CPU", rtol=rtol)

    xd = op.to_gpu(x_orig)
    yd = op.to_gpu(y)
    op.add_vec(xd, alpha, yd, beta)
    res = op.to_cpu(xd)
    assert_allclose(z, res, err_msg="GPU", rtol=rtol)
Beispiel #4
0
def test_add_vec():
    x = 5.0 * np.random.randn(10).astype(np.float32)
    y = 10.0 * np.random.randn(10).astype(np.float32)
    x_orig = x.copy()
    alpha = 2.5
    z = x + alpha * y
    rtol = 1e-4

    op.add_vec(x, alpha, y)
    assert_allclose(z, x, err_msg="CPU", rtol=rtol)

    xd = op.to_gpu(x_orig)
    yd = op.to_gpu(y)
    op.add_vec(xd, alpha, yd)
    res = op.to_cpu(xd)
    assert_allclose(z, res, err_msg="GPU", rtol=rtol)

    x = x_orig.copy()
    alpha = 2.5
    beta = 0.5
    z = beta * x + alpha * y
    rtol = 1e-4

    op.add_vec(x, alpha, y, beta)
    assert_allclose(z, x, err_msg="CPU", rtol=rtol)

    xd = op.to_gpu(x_orig)
    yd = op.to_gpu(y)
    op.add_vec(xd, alpha, yd, beta)
    res = op.to_cpu(xd)
    assert_allclose(z, res, err_msg="GPU", rtol=rtol)
Beispiel #5
0
def test_togpu():
    X = np.random.randn(3, 5)
    Xd = op.to_gpu(X)
    assert type(Xd) == gpuarray.GPUArray
    assert Xd.shape == X.shape

    Xd2 = op.to_gpu(Xd)
    assert Xd2 is Xd
Beispiel #6
0
def test_togpu():
    X = np.random.randn(3, 5)
    Xd = op.to_gpu(X)
    assert type(Xd) == gpuarray.GPUArray
    assert Xd.shape == X.shape

    Xd2 = op.to_gpu(Xd)
    assert Xd2 is Xd
Beispiel #7
0
def train(net, dataset, fname=None, skip_output=25,
          show_plots=False, use_gpu=True, **kwargs):
    ''' Trains a neural network on the given dataset.

    If desired, the log-statements during training can be buffered into a
    StringIO object. This has the drawback that the output is only visible
    once the net has been fully trained, but it allows to only print only every
    n-th message.

    Parameters
    ----------
    net: the neural net.
    dataset: tuple containing 'trainx', 'trainy', 'validx', 'validy'
    fname: file-name in which to store the (pickled) network after training.
           The file will be stored in the 'data' subfolder of the CWD.
    skip_output: how many lines of output to skip between two lines that
                 will actually be printed.
    show_plots: If True, plot the first 256 weights of the lowest layer.
    use_gpu: if True, use gnumpy to run the code on the GPU.
    **kwargs: additional parameters for the `plotImages` cool when
              `plot_weights=True`.
    '''
    from binet import op
    if use_gpu:
        gc.collect()
        if not op._IS_CUDA_INITIALIZED:
            logger = logging.getLogger(__name__)
            logger.warn("CUDA not initialized, initializing GPU 0")
            op.init_gpu(0)

        X, y, Xvalid, yvalid = [op.to_gpu(d) for d in dataset]
        net = op.to_gpu(net)
    else:
        X, y, Xvalid, yvalid = dataset
    try:
        init_out = net.transform(X)
        init_err = net._get_loss(y, init_out)
        net.track_progress(time.time(), init_err, X, y, Xvalid, yvalid)
        net.fit(X, y, Xvalid, yvalid, skip_output=skip_output)
        #if net.verbose and net.current_epoch % skip_output != 0: # make sure we show the last line
        #    net.track_progress(time.time(), -1, X, y, Xvalid, yvalid)
    except KeyboardInterrupt:
        print("Intercepted KeyboardInterrupt, stopping... current status:")
        net.track_progress(time.time(), -1, X, y, Xvalid, yvalid)
        net.statistics = net.statistics[:-1] # we just added an invalid point
    finally:
        net = op.to_cpu(net)
        if fname:
            if not os.path.exists("data"):
                warnings.warn("creating 'data' directory to store pickled net")
                os.mkdir("data")
            with open(os.path.join("data", fname), "wb") as f:
                pickle.dump(net, f, -1)
        if show_plots:
            plot_images(net.weights[0], 16, 16, **kwargs)
            plot_learning_curves(net, **kwargs)
    return net
Beispiel #8
0
def test_dsigmoid_delta():
    X = np.random.randn(3, 5).astype(np.float32)
    A = 5*np.random.randn(30, 50).astype(np.float32)
    D = 5*np.random.randn(30, 50).astype(np.float32)
    D_expected = D * A*(1 - A)
    Dd = op.to_gpu(D)
    Yh = op.dsigmoid_delta(D, A, X)
    assert_allclose(D_expected, D, rtol=1e-5, err_msg="CPU")
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    op.dsigmoid_delta(Dd, Ad, Xd)
    assert_allclose(D_expected, op.to_cpu(Dd), rtol=1e-5, err_msg="GPU")
Beispiel #9
0
def test_dsigmoid_delta():
    X = np.random.randn(3, 5).astype(np.float32)
    A = 5 * np.random.randn(30, 50).astype(np.float32)
    D = 5 * np.random.randn(30, 50).astype(np.float32)
    D_expected = D * A * (1 - A)
    Dd = op.to_gpu(D)
    Yh = op.dsigmoid_delta(D, A, X)
    assert_allclose(D_expected, D, rtol=1e-5, err_msg="CPU")
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    op.dsigmoid_delta(Dd, Ad, Xd)
    assert_allclose(D_expected, op.to_cpu(Dd), rtol=1e-5, err_msg="GPU")
Beispiel #10
0
def test_toplayer_delta():
    X = np.random.randn(3, 5).astype(np.float32)
    A = 5 * np.random.randn(30, 50).astype(np.float32)
    D = 5 * np.random.randn(30, 50).astype(np.float32)
    D_expected = D.copy()
    D_expected = A - D_expected
    Dd = op.to_gpu(D)
    Yh = op.toplayer_delta(A, D, X)
    assert_allclose(D_expected, Yh, rtol=1e-5, err_msg="CPU")
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    Yhd = op.toplayer_delta(Ad, Dd, Xd)
    assert_allclose(D_expected, op.to_cpu(Yhd), rtol=1e-5, err_msg="GPU")
Beispiel #11
0
def test_toplayer_delta():
    X = np.random.randn(3, 5).astype(np.float32)
    A = 5*np.random.randn(30, 50).astype(np.float32)
    D = 5*np.random.randn(30, 50).astype(np.float32)
    D_expected = D.copy()
    D_expected = A - D_expected
    Dd = op.to_gpu(D)
    Yh = op.toplayer_delta(A, D, X)
    assert_allclose(D_expected, Yh, rtol=1e-5, err_msg="CPU")
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    Yhd = op.toplayer_delta(Ad, Dd, Xd)
    assert_allclose(D_expected, op.to_cpu(Yhd), rtol=1e-5, err_msg="GPU")
Beispiel #12
0
    def partial_fit(self, X, y, encode_labels=True):
        ''' Runs one epoch of minibatch-backprop on the given data.

        Note: Input-Dropout might overwrite parts of X!

        Expects y in One-Hot format'''
        if not sparse.isspmatrix_csr(X):
            assert (X.flags.c_contiguous)

        cur_lr, cur_momentum = self._get_current_learningrate(
            self.current_epoch)
        err = 0.0
        nbatches = 0
        for s in generate_slices(X.shape[0], self.batch_size, \
                                 self.ignore_last_minibatch_if_smaller):
            Xtemp = X[s]
            ytemp = y[s]

            # for sparse matrices, the fastest option is to convert to
            # dense on the GPU and then operate in dense
            if sparse.isspmatrix_csr(X) and isinstance(self.layers[0].W,
                                                       op.gpuarray.GPUArray):
                a = op.cuda_memory_pool.allocate
                #Xtemp = op.to_gpu(Xtemp.A, stream=op.streams[0])
                #ytemp = op.to_gpu(ytemp, stream=op.streams[1])
                Xtemp = op.GPUCSRArray(Xtemp,
                                       allocator=a,
                                       stream=op.streams[0])
                #Xtemp = Xtemp.todense(allocator=a, stream=op.streams[0])
                if sparse.isspmatrix_csr(ytemp):
                    ytemp = op.to_gpu(ytemp.toarray(), stream=op.streams[1])
                else:
                    ytemp = op.to_gpu(ytemp, stream=op.streams[1])

            out = self.forward_pass(Xtemp)
            op.streams[1].synchronize()
            self.backward_pass(out, ytemp, cur_momentum)
            op.streams[2].synchronize()
            for i, l in enumerate(self.layers):
                l.update(cur_lr[i], stream=op.streams[i % len(op.streams)])
            self.update_count += 1

            batch_error = self._get_loss(ytemp, out)
            err += batch_error
            nbatches += 1

            for cb in self._minibatch_callbacks:
                cb(self, batch_error, Xtemp, ytemp)

        self.current_epoch += 1
        return err / nbatches
Beispiel #13
0
def run_function(X,
                 Y_expected,
                 func,
                 rtol=1e-6,
                 with_inplace_test=True,
                 **kwargs):
    # CPU, with target argument
    Y = np.empty_like(Y_expected)
    Yhr = func(X, out=Y, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU with target", rtol=rtol)
    assert Yhr is Y

    # CPU, no target argument
    Yhr = func(X, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)

    if with_inplace_test:
        X2 = X.copy()
        Yhr = func(X2, out=X2, **kwargs)
        assert_allclose(Y_expected,
                        Yhr,
                        err_msg="CPU, inplace target",
                        rtol=rtol)
        assert Yhr is X2

    kwargs = op.to_gpu(kwargs)

    # GPU, with target
    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(op.to_gpu(Y_expected))
    Ydr = func(Xd, out=Yd, **kwargs)
    assert_allclose(Y_expected,
                    op.to_cpu(Ydr),
                    err_msg="GPU with target",
                    rtol=rtol)
    assert Ydr is Yd

    # GPU, no target
    Ydr = func(Xd, **kwargs)
    assert_allclose(Y_expected,
                    op.to_cpu(Ydr),
                    err_msg="GPU, no target",
                    rtol=rtol)

    if with_inplace_test:
        Ydr = func(Xd, out=Xd, **kwargs)
        assert_allclose(Y_expected,
                        op.to_cpu(Ydr),
                        err_msg="GPU, inplace target",
                        rtol=rtol)
        assert Ydr is Xd
Beispiel #14
0
def test_gpusparseA_sgemm():
    from scipy.sparse import csr_matrix
    A = np.random.laplace(size=(5, 3)).astype(np.float32)
    A[A < 0.1] = 0
    A = csr_matrix(A, dtype=np.float32)
    B = np.random.normal(size=(3, 6)).astype(np.float32, order="c")
    C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c')

    X_exp = (A * B) + 0.5 * C
    Ad = GPUCSRArray(A)
    Bd = op.to_gpu(B)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
Beispiel #15
0
def test_add_matvec():
    X = np.random.randn(3, 4).astype(np.float32)
    b1 = np.random.randn(4, 1).astype(np.float32)
    b2 = np.random.randn(3, 1).astype(np.float32)
    Y_expected1 = X + b1.T
    Y_expected2 = X + b2
    assert_allclose(Y_expected1, op.add_matvec(X, b1, 1))
    assert_allclose(Y_expected2, op.add_matvec(X, b2, 0))

    Xd = op.to_gpu(X)
    b1d = op.to_gpu(b1)
    b2d = op.to_gpu(b2)
    assert_allclose(Y_expected1, op.to_cpu(op.add_matvec(Xd, b1d, 1)))
    assert_allclose(Y_expected2, op.to_cpu(op.add_matvec(Xd, b2d, 0)))
Beispiel #16
0
def test_gpusparseB_sgemm_tb():
    from scipy.sparse import csr_matrix
    B = np.random.laplace(size=(3, 5)).astype(np.float32)
    B[B<0.1] = 0
    B = csr_matrix(B, dtype=np.float32)
    A = np.random.normal(size=(4, 5)).astype(np.float32, order="c")
    C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c')
    X_exp = (A*B.T) + 0.5*C

    Bd = GPUCSRArray(B)
    Ad = op.to_gpu(A)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
Beispiel #17
0
def tes_deactivate_t_gpusparseB_sgemm_ta_bug():
    from scipy.sparse import csr_matrix
    A = np.random.normal(size=(6, 12)).astype(np.float32, order="c")
    B = np.random.laplace(size=(6, 33)).astype(np.float32)
    B[B<0.1] = 0
    B = csr_matrix(B, dtype=np.float32)
    C = np.ones((12, 33), dtype=np.float32, order='c')
    X_exp = (A.T*B) + 0.5*C

    Bd = GPUCSRArray(B)
    Ad = op.to_gpu(A)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(), X_exp, rtol=1e-3, err_msg="gpusparse_sgemmB ta bug")
Beispiel #18
0
def test_gpusparseA_sgemm():
    from scipy.sparse import csr_matrix
    A = np.random.laplace(size=(5, 3)).astype(np.float32)
    A[A<0.1] = 0
    A = csr_matrix(A, dtype=np.float32)
    B = np.random.normal(size=(3, 6)).astype(np.float32, order="c")
    C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32, order='c')

    X_exp = (A*B) + 0.5*C
    Ad = GPUCSRArray(A)
    Bd = op.to_gpu(B)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemm")
Beispiel #19
0
def test_gpusparseB_sgemm_tb():
    from scipy.sparse import csr_matrix
    B = np.random.laplace(size=(3, 5)).astype(np.float32)
    B[B < 0.1] = 0
    B = csr_matrix(B, dtype=np.float32)
    A = np.random.normal(size=(4, 5)).astype(np.float32, order="c")
    C = np.ones((A.shape[0], B.shape[0]), dtype=np.float32, order='c')
    X_exp = (A * B.T) + 0.5 * C

    Bd = GPUCSRArray(B)
    Ad = op.to_gpu(A)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, transB=True, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(), X_exp, rtol=1e-4, err_msg="gpusparse_sgemmB tb")
Beispiel #20
0
def test_add_matvec():
    X = np.random.randn(3, 4).astype(np.float32)
    b1 = np.random.randn(4, 1).astype(np.float32)
    b2 = np.random.randn(3, 1).astype(np.float32)
    Y_expected1 = X + b1.T
    Y_expected2 = X + b2
    assert_allclose(Y_expected1, op.add_matvec(X, b1, 1))
    assert_allclose(Y_expected2, op.add_matvec(X, b2, 0))

    Xd = op.to_gpu(X)
    b1d = op.to_gpu(b1)
    b2d = op.to_gpu(b2)
    assert_allclose(Y_expected1, op.to_cpu(op.add_matvec(Xd, b1d, 1)))
    assert_allclose(Y_expected2, op.to_cpu(op.add_matvec(Xd, b2d, 0)))
Beispiel #21
0
def test_crossentropy():
    X = np.random.rand(100, 10).astype(np.float32)
    O = np.random.rand(100, 10).astype(np.float32)
    X /= X.sum(1)[:, None]
    O /= O.sum(1)[:, None]
    Y_expected = -np.sum(X * np.log(O)) / X.shape[0]
    rtol=1e-4
    Y = np.empty_like(X)
    Yhr = op.cross_entropy(X, O)
    assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)

    Xd = op.to_gpu(X)
    Od = op.to_gpu(O)
    Yd = op.cross_entropy(Xd, Od)
    assert_allclose(Y_expected, op.to_cpu(Yd), err_msg="GPU, no target", rtol=rtol)
Beispiel #22
0
def test_gpucpu_fprop_equality():
    '''Test forward propagation CPU/GPU equality.'''
    neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]])
    netd = op.to_gpu(copy.deepcopy(neth))
    outh = neth.forward_pass(X)
    outd = netd.forward_pass(Xd)
    assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error")
Beispiel #23
0
def test_tognumpy_list():
    X = [np.random.randn(3, 5), "teststring"]
    Xd = op.to_gpu(X)
    Xh = op.to_cpu(Xd)
    assert type(Xh[0]) == np.ndarray
    assert Xh[0].shape == X[0].shape
    assert_array_equal(Xh[0], X[0])
Beispiel #24
0
def test_randomly_replace_elements():
    for val in (0.0, 0.5, 5):
        for p in (0.1, 0.2, 0.5, 0.75, 0.99):
            X = np.random.normal(size=(1024, 2048)).astype(np.float32)
            Xd = op.to_gpu(X)
            Xr, M = op.randomly_replace_elements(X, p, val)
            assert (Xr is X)
            assert_almost_equal((X == val).mean(),
                                p,
                                decimal=2,
                                err_msg="val: %.1f p: %.1f" % (val, p))
            assert_almost_equal(M.mean(),
                                1 - p,
                                decimal=2,
                                err_msg="M val: %.1f p: %.1f" % (val, p))

            Xrd, Md = op.randomly_replace_elements(Xd, p, val)
            assert (Xrd is Xd)
            assert_almost_equal(op.to_cpu(op.mean(Xd == val)),
                                p,
                                decimal=2,
                                err_msg="val: %.1f p: %.1f (gpu)" % (val, p))
            assert_almost_equal(op.to_cpu(op.mean(Md)),
                                1 - p,
                                decimal=2,
                                err_msg="M val: %.1f p: %.1f (gpu)" % (val, p))
Beispiel #25
0
def test_tognumpy_list():
    X = [np.random.randn(3, 5), "teststring"]
    Xd = op.to_gpu(X)
    Xh = op.to_cpu(Xd)
    assert type(Xh[0]) == np.ndarray
    assert Xh[0].shape == X[0].shape
    assert_array_equal(Xh[0], X[0])
Beispiel #26
0
def test_gpucpu_fprop_equality():
    '''Test forward propagation CPU/GPU equality.'''
    neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]])
    netd = op.to_gpu(copy.deepcopy(neth))
    outh = neth.forward_pass(X)
    outd = netd.forward_pass(Xd)
    assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error")
Beispiel #27
0
def run_function_with_axis(X,
                           ax0_expected,
                           ax1_expected,
                           noax_expected,
                           func,
                           rtol=1e-6):
    # CPU, no target argument
    ah0 = func(X, axis=0)
    assert_allclose(ax0_expected, ah0, err_msg="CPU, axis=0", rtol=rtol)
    ah1 = func(X, axis=1)
    assert_allclose(ax1_expected, ah1, err_msg="CPU, axis=1", rtol=rtol)
    if noax_expected is not None:
        ah = func(X)
        assert_allclose(noax_expected, ah, err_msg="CPU, axis=1", rtol=rtol)

    Xd = op.to_gpu(X)
    # GPU, no target
    ad0 = func(Xd, axis=0)
    assert_allclose(ax0_expected,
                    op.to_cpu(ad0),
                    err_msg="GPU, axis=0",
                    rtol=rtol)
    ad1 = func(Xd, axis=1)
    assert_allclose(ax1_expected,
                    op.to_cpu(ad1),
                    err_msg="GPU, axis=1",
                    rtol=rtol)
    if noax_expected is not None:
        ad = func(Xd)
        assert_allclose(noax_expected,
                        op.to_cpu(ad),
                        err_msg="GPU, axis=1",
                        rtol=rtol)
Beispiel #28
0
    def partial_fit(self, X, y, encode_labels=True):
        ''' Runs one epoch of minibatch-backprop on the given data.

        Note: Input-Dropout might overwrite parts of X!

        Expects y in One-Hot format'''
        if not sparse.isspmatrix_csr(X):
            assert(X.flags.c_contiguous)

        cur_lr, cur_momentum = self._get_current_learningrate(self.current_epoch)
        err = 0.0
        nbatches = 0
        for s in generate_slices(X.shape[0], self.batch_size, \
                                 self.ignore_last_minibatch_if_smaller):
            Xtemp = X[s]
            ytemp = y[s]

            # for sparse matrices, the fastest option is to convert to
            # dense on the GPU and then operate in dense
            if sparse.isspmatrix_csr(X) and isinstance(self.layers[0].W, op.gpuarray.GPUArray):
                a = op.cuda_memory_pool.allocate
                #Xtemp = op.to_gpu(Xtemp.A, stream=op.streams[0])
                #ytemp = op.to_gpu(ytemp, stream=op.streams[1])
                Xtemp = op.GPUCSRArray(Xtemp, allocator=a, stream=op.streams[0])
                #Xtemp = Xtemp.todense(allocator=a, stream=op.streams[0])
                if sparse.isspmatrix_csr(ytemp):
                    ytemp = op.to_gpu(ytemp.toarray(), stream=op.streams[1])
                else:
                    ytemp = op.to_gpu(ytemp, stream=op.streams[1])

            out = self.forward_pass(Xtemp)
            op.streams[1].synchronize()
            self.backward_pass(out, ytemp, cur_momentum)
            op.streams[2].synchronize()
            for i, l in enumerate(self.layers):
                l.update(cur_lr[i], stream=op.streams[i % len(op.streams)])
            self.update_count += 1

            batch_error = self._get_loss(ytemp, out)
            err += batch_error
            nbatches += 1

            for cb in self._minibatch_callbacks:
                cb(self, batch_error, Xtemp, ytemp)

        self.current_epoch += 1
        return err / nbatches
Beispiel #29
0
def test_togpu_dict():
    X = {'arr': np.random.randn(3, 5), 'str': "teststring"}
    X_orig = copy.deepcopy(X)
    Xd = op.to_gpu(X)
    assert type(Xd['arr']) == op.gpuarray.GPUArray
    assert Xd['arr'].shape == X_orig['arr'].shape
    Xh = op.to_cpu(Xd['arr'])
    assert_allclose(Xh, X_orig['arr'])
Beispiel #30
0
def test_togpu_list():
    X = [np.random.randn(3, 5), "teststring"]
    X_orig = copy.deepcopy(X)
    Xd = op.to_gpu(X)
    assert type(Xd[0]) == op.gpuarray.GPUArray
    assert Xd[0].shape == X_orig[0].shape
    Xh = op.to_cpu(Xd[0])
    assert_allclose(Xh, X_orig[0])
Beispiel #31
0
def tes_deactivate_t_gpusparseB_sgemm_ta_bug():
    from scipy.sparse import csr_matrix
    A = np.random.normal(size=(6, 12)).astype(np.float32, order="c")
    B = np.random.laplace(size=(6, 33)).astype(np.float32)
    B[B < 0.1] = 0
    B = csr_matrix(B, dtype=np.float32)
    C = np.ones((12, 33), dtype=np.float32, order='c')
    X_exp = (A.T * B) + 0.5 * C

    Bd = GPUCSRArray(B)
    Ad = op.to_gpu(A)
    Cd = op.to_gpu(C)
    Xd = op.add_dot(Ad, Bd, Cd, transA=True, alpha=1.0, beta=0.5)
    assert_allclose(Xd.get(),
                    X_exp,
                    rtol=1e-3,
                    err_msg="gpusparse_sgemmB ta bug")
Beispiel #32
0
def test_togpu_dict():
    X = {'arr': np.random.randn(3, 5), 'str': "teststring"}
    X_orig = copy.deepcopy(X)
    Xd = op.to_gpu(X)
    assert type(Xd['arr']) == op.gpuarray.GPUArray
    assert Xd['arr'].shape == X_orig['arr'].shape
    Xh = op.to_cpu(Xd['arr'])
    assert_allclose(Xh, X_orig['arr'])
Beispiel #33
0
def test_togpu_list():
    X = [np.random.randn(3, 5), "teststring"]
    X_orig = copy.deepcopy(X)
    Xd = op.to_gpu(X)
    assert type(Xd[0]) == op.gpuarray.GPUArray
    assert Xd[0].shape == X_orig[0].shape
    Xh = op.to_cpu(Xd[0])
    assert_allclose(Xh, X_orig[0])
Beispiel #34
0
def test_togpu_class():
    class MyTest:
        def __init__(self):
            self.X = np.random.randn(3, 5)
    t = MyTest()
    Td = op.to_gpu(t)
    assert type(Td.X) == gpuarray.GPUArray, "type is %s" % type(Td.X)
    assert Td.X.shape == (3, 5)
Beispiel #35
0
def test_crossentropy():
    X = np.random.rand(100, 10).astype(np.float32)
    O = np.random.rand(100, 10).astype(np.float32)
    X /= X.sum(1)[:, None]
    O /= O.sum(1)[:, None]
    Y_expected = -np.sum(X * np.log(O)) / X.shape[0]
    rtol = 1e-4
    Y = np.empty_like(X)
    Yhr = op.cross_entropy(X, O)
    assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)

    Xd = op.to_gpu(X)
    Od = op.to_gpu(O)
    Yd = op.cross_entropy(Xd, Od)
    assert_allclose(Y_expected,
                    op.to_cpu(Yd),
                    err_msg="GPU, no target",
                    rtol=rtol)
Beispiel #36
0
def test_tonumpy_class():
    class MyTest:
        def __init__(self):
            self.X = np.random.randn(3, 5)
    t = MyTest()
    Td = op.to_gpu(t)
    Th = op.to_cpu(Td)
    assert type(Th.X) == np.ndarray
    assert Th.X.shape == (3, 5)
Beispiel #37
0
def test_togpu_class():
    class MyTest:
        def __init__(self):
            self.X = np.random.randn(3, 5)

    t = MyTest()
    Td = op.to_gpu(t)
    assert type(Td.X) == gpuarray.GPUArray, "type is %s" % type(Td.X)
    assert Td.X.shape == (3, 5)
Beispiel #38
0
def test_nan_in_toplayer_delta():
    size = (200, 10)
    X = np.random.normal(size=size).astype(np.float32, order="c")
    A = op.sigmoid(X)
    Y = np.random.binomial(1.0, p=0.5, size=size).astype(np.float32)
    M = np.random.binomial(1.0, p=0.9, size=size).astype(np.float32)
    Y[~M.astype(np.bool)] = np.nan
    Y_orig = Y.copy()
    D = M * (A - Y)
    D[~M.astype(np.bool)] = 0.0

    Y = op.toplayer_delta(A, Y, X)
    assert_allclose(Y, D)

    Yd = op.to_gpu(Y_orig)
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    Yd = op.toplayer_delta(Ad, Yd, Xd)
    assert_allclose(Yd.get(), D)
Beispiel #39
0
def test_tonumpy_class():
    class MyTest:
        def __init__(self):
            self.X = np.random.randn(3, 5)

    t = MyTest()
    Td = op.to_gpu(t)
    Th = op.to_cpu(Td)
    assert type(Th.X) == np.ndarray
    assert Th.X.shape == (3, 5)
Beispiel #40
0
def test_reorderrows():
    n = 1270
    X = 5 * np.random.randn(n, 1000).astype(np.float32)
    idx = list(range(X.shape[0]))
    np.random.shuffle(idx)
    Xd = op.to_gpu(X)
    Xoutd = gpuarray.empty_like(Xd)
    op.reorder_rows(Xd, idx, Xoutd)
    assert_allclose(X[idx], Xoutd.get())
    assert_allclose(X[idx], op.reorder_rows(X, idx))
Beispiel #41
0
def test_reorderrows():
    n = 1270
    X = 5*np.random.randn(n, 1000).astype(np.float32)
    idx = list(range(X.shape[0]))
    np.random.shuffle(idx)
    Xd = op.to_gpu(X)
    Xoutd = gpuarray.empty_like(Xd)
    op.reorder_rows(Xd, idx, Xoutd)
    assert_allclose(X[idx], Xoutd.get())
    assert_allclose(X[idx], op.reorder_rows(X, idx))
Beispiel #42
0
def test_nan_in_toplayer_delta():
    size = (200, 10)
    X = np.random.normal(size=size).astype(np.float32, order="c")
    A = op.sigmoid(X)
    Y = np.random.binomial(1.0, p=0.5, size=size).astype(np.float32)
    M = np.random.binomial(1.0, p=0.9, size=size).astype(np.float32)
    Y[~M.astype(np.bool)] = np.nan
    Y_orig = Y.copy()
    D = M * (A - Y)
    D[~M.astype(np.bool)] = 0.0

    Y = op.toplayer_delta(A, Y, X)
    assert_allclose(Y, D)

    Yd = op.to_gpu(Y_orig)
    Ad = op.to_gpu(A)
    Xd = op.to_gpu(X)
    Yd = op.toplayer_delta(Ad, Yd, Xd)
    assert_allclose(Yd.get(), D)
Beispiel #43
0
def test_tonumpy():
    X = np.random.randn(3, 5)
    Xd = op.to_gpu(X)
    Xh = op.to_cpu(Xd)
    assert type(Xh) == np.ndarray
    assert Xh.shape == X.shape
    assert_allclose(Xh, X)

    X2 = op.to_cpu(X)
    assert X2 is X
Beispiel #44
0
def test_tonumpy():
    X = np.random.randn(3, 5)
    Xd = op.to_gpu(X)
    Xh = op.to_cpu(Xd)
    assert type(Xh) == np.ndarray
    assert Xh.shape == X.shape
    assert_allclose(Xh, X)

    X2 = op.to_cpu(X)
    assert X2 is X
Beispiel #45
0
def test_l1reg():

    # NOTE: you could argue wether it's okay to "jump over zero"
    #       when applying both the regular gradient and the L1 gradient
    l1_penalty=0.005
    w = np.array( [3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32)
    dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32)
    eta = 1.0

    nw = w + dw - l1_penalty*np.sign(w)
    expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw))
    y = np.empty_like(dw)
    op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y)
    assert_allclose(expected, y)

    wd = op.to_gpu(w)
    dwd = op.to_gpu(dw)

    yd = op.to_gpu(np.empty_like(dw))
    op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd)
    assert_allclose(expected, op.to_cpu(yd))
Beispiel #46
0
def test_l1reg():

    # NOTE: you could argue wether it's okay to "jump over zero"
    #       when applying both the regular gradient and the L1 gradient
    l1_penalty = 0.005
    w = np.array([3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32)
    dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32)
    eta = 1.0

    nw = w + dw - l1_penalty * np.sign(w)
    expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw))
    y = np.empty_like(dw)
    op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y)
    assert_allclose(expected, y)

    wd = op.to_gpu(w)
    dwd = op.to_gpu(dw)

    yd = op.to_gpu(np.empty_like(dw))
    op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd)
    assert_allclose(expected, op.to_cpu(yd))
Beispiel #47
0
def test_softmax():
    X = np.random.randn(30, 50).astype(np.float32)
    E = np.exp(X)
    Y_expected = E / np.sum(E, axis=1).reshape(-1, 1)
    run_function(X, Y_expected, op.softmax, rtol=1e-4)

    X = 10000*np.random.randn(30, 50).astype(np.float32)
    Y = op.softmax(X)
    assert np.all(np.isfinite(Y))

    Y = op.softmax(op.to_gpu(X))
    assert np.all(np.isfinite(op.to_cpu(Y)))
Beispiel #48
0
def test_softmax():
    X = np.random.randn(30, 50).astype(np.float32)
    E = np.exp(X)
    Y_expected = E / np.sum(E, axis=1).reshape(-1, 1)
    run_function(X, Y_expected, op.softmax, rtol=1e-4)

    X = 10000 * np.random.randn(30, 50).astype(np.float32)
    Y = op.softmax(X)
    assert np.all(np.isfinite(Y))

    Y = op.softmax(op.to_gpu(X))
    assert np.all(np.isfinite(op.to_cpu(Y)))
Beispiel #49
0
def test_csrmm_bug():
    ''' the 2nd call might crash'''
    from scipy.sparse import csr_matrix
    W = np.random.normal(size=(5, 3)).astype(np.float32, order="c")
    X = np.random.laplace(size=(6, 3)).astype(np.float32)
    X[X<0.1] = 0
    X = csr_matrix(X, dtype=np.float32)

    Xd = GPUCSRArray(X)
    Wd = op.to_gpu(W)
    Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0])
    op.add_dot(Cd, Xd, Wd, True, False, alpha=-0.3, beta=1.0, stream=op.streams[0])
    op.mean(Cd, axis=0, stream=op.streams[1])
Beispiel #50
0
def test_swaprows():
    n = 1270

    X = 5.0 * np.random.randn(n, 1000).astype(np.float32)
    ytemp = np.array(range(X.shape[0]))[:, None]
    y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32)

    idx = list(range(X.shape[0]))
    idx = np.array(idx, dtype=np.int32)
    np.random.shuffle(idx)

    Xd = op.to_gpu(X)
    yd = op.to_gpu(y)
    Xoutd = gpuarray.empty_like(Xd)
    youtd = gpuarray.empty_like(yd)
    op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx)

    X2 = op.to_cpu(Xoutd)
    y2 = op.to_cpu(youtd)

    assert_allclose(X[idx], X2)
    assert_allclose(y[idx], y2)
Beispiel #51
0
def test_swaprows():
    n = 1270

    X = 5.0*np.random.randn(n, 1000).astype(np.float32)
    ytemp = np.array(range(X.shape[0]))[:, None]
    y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32)

    idx = list(range(X.shape[0]))
    idx = np.array(idx, dtype=np.int32)
    np.random.shuffle(idx)

    Xd = op.to_gpu(X)
    yd = op.to_gpu(y)
    Xoutd = gpuarray.empty_like(Xd)
    youtd = gpuarray.empty_like(yd)
    op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx)

    X2 = op.to_cpu(Xoutd)
    y2 = op.to_cpu(youtd)

    assert_allclose(X[idx], X2)
    assert_allclose(y[idx], y2)
Beispiel #52
0
def train_ensemble(prototype_net,
                   dataset,
                   outfile=None,
                   n_nets=10,
                   use_gpu=True):
    ''' Trains a given number of networks on a given dataset.

    All networks will be clones of the given prototoype, and they will all
    be pickled into the given outfile.'''
    from binet import op
    if use_gpu:
        gc.collect()
        if not op._IS_CUDA_INITIALIZED:
            logger = logging.getLogger(__name__)
            logger.warn("CUDA not initialized, initializing GPU 0")
            op.init_gpu(0)

        X, y, Xvalid, yvalid = [op.to_gpu(d) for d in dataset]
        prototype_net = op.to_gpu(prototype_net)
    else:
        X, y, Xvalid, yvalid = dataset
    if outfile is not None:
        f = open(outfile, "wb")
    nets = []
    try:
        for i in range(n_nets):
            prototype_net.reset()
            if use_gpu:
                prototype_net = op.to_gpu(prototype_net)
            prototype_net.fit(X, y, Xvalid, yvalid)
            prototype_net = op.to_cpu(prototype_net)
            nets.append(copy.deepcopy(prototype_net))
            if outfile is not None:
                pickle.dump(prototype_net, f, -1)
    finally:
        if outfile is not None:
            f.close()
    return nets
Beispiel #53
0
def test_randomly_replace_elements():
    for val in (0.0, 0.5, 5):
        for p in (0.1, 0.2, 0.5, 0.75, 0.99):
            X = np.random.normal(size=(1024, 2048)).astype(np.float32)
            Xd = op.to_gpu(X)
            Xr, M = op.randomly_replace_elements(X, p, val)
            assert(Xr is X)
            assert_almost_equal((X == val).mean(), p, decimal=2,
                                err_msg="val: %.1f p: %.1f" % (val, p))
            assert_almost_equal(M.mean(), 1-p, decimal=2,
                                err_msg="M val: %.1f p: %.1f" % (val, p))

            Xrd, Md = op.randomly_replace_elements(Xd, p, val)
            assert(Xrd is Xd)
            assert_almost_equal(op.to_cpu(op.mean(Xd == val)), p, decimal=2,
                                err_msg="val: %.1f p: %.1f (gpu)" % (val, p))
            assert_almost_equal(op.to_cpu(op.mean(Md)), 1-p, decimal=2,
                                err_msg="M val: %.1f p: %.1f (gpu)" % (val, p))
Beispiel #54
0
def run_function_with_axis(X, ax0_expected, ax1_expected, noax_expected, func, rtol=1e-6):
    # CPU, no target argument
    ah0 = func(X, axis=0)
    assert_allclose(ax0_expected, ah0, err_msg="CPU, axis=0", rtol=rtol)
    ah1 = func(X, axis=1)
    assert_allclose(ax1_expected, ah1, err_msg="CPU, axis=1", rtol=rtol)
    if noax_expected is not None:
        ah = func(X)
        assert_allclose(noax_expected, ah, err_msg="CPU, axis=1", rtol=rtol)

    Xd = op.to_gpu(X)
    # GPU, no target
    ad0 = func(Xd, axis=0)
    assert_allclose(ax0_expected, op.to_cpu(ad0), err_msg="GPU, axis=0", rtol=rtol)
    ad1 = func(Xd, axis=1)
    assert_allclose(ax1_expected, op.to_cpu(ad1), err_msg="GPU, axis=1", rtol=rtol)
    if noax_expected is not None:
        ad = func(Xd)
        assert_allclose(noax_expected, op.to_cpu(ad), err_msg="GPU, axis=1", rtol=rtol)
Beispiel #55
0
def test_csrmm_bug():
    ''' the 2nd call might crash'''
    from scipy.sparse import csr_matrix
    W = np.random.normal(size=(5, 3)).astype(np.float32, order="c")
    X = np.random.laplace(size=(6, 3)).astype(np.float32)
    X[X < 0.1] = 0
    X = csr_matrix(X, dtype=np.float32)

    Xd = GPUCSRArray(X)
    Wd = op.to_gpu(W)
    Cd = op.dot(Xd, Wd, False, True, out=None, stream=op.streams[0])
    op.add_dot(Cd,
               Xd,
               Wd,
               True,
               False,
               alpha=-0.3,
               beta=1.0,
               stream=op.streams[0])
    op.mean(Cd, axis=0, stream=op.streams[1])
Beispiel #56
0
def test_gpucpu_bprop_equality():
    '''Test backpropagation CPU/GPU equality.'''

    neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]])
    netd = op.to_gpu(copy.deepcopy(neth))

    outh = neth.forward_pass(X)
    lh = neth.backward_pass(outh, y)

    outd = netd.forward_pass(Xd)
    ld = netd.backward_pass(outd, yd)

    assert_almost_equal(lh, ld)
    assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error")
    for i in reversed(range(len(neth.layersizes) - 1)):
        dwh = neth.layers[i].dW
        dbh = neth.layers[i].db
        dwd = netd.layers[i].dW.get()
        dbd = netd.layers[i].db.get()
        assert_allclose(dwh, dwd, atol=1e-5, err_msg="dW diff in layer %d" % i)
        assert_allclose(dbh, dbd, atol=1e-5, err_msg="db diff in layer %d" % i)
Beispiel #57
0
def test_gpucpu_bprop_equality():
    '''Test backpropagation CPU/GPU equality.'''

    neth = NeuralNet([X.shape[1], 128, 32, y.shape[1]])
    netd = op.to_gpu(copy.deepcopy(neth))

    outh = neth.forward_pass(X)
    lh = neth.backward_pass(outh, y)

    outd = netd.forward_pass(Xd)
    ld = netd.backward_pass(outd, yd)

    assert_almost_equal(lh, ld)
    assert_allclose(outd.get(), outh, rtol=1e-5, err_msg="frop error")
    for i in reversed(range(len(neth.layersizes)-1)):
        dwh = neth.layers[i].dW
        dbh = neth.layers[i].db
        dwd = netd.layers[i].dW.get()
        dbd = netd.layers[i].db.get()
        assert_allclose(dwh, dwd, atol=1e-5, err_msg="dW diff in layer %d" % i)
        assert_allclose(dbh, dbd, atol=1e-5, err_msg="db diff in layer %d" % i)
Beispiel #58
0
def test_rand_gaussian():
    X = np.empty((4000, 1000), dtype=np.float32)
    Y = op.rand_gaussian_like(X)
    rtol = 1e-3
    assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 1.0 < rtol, "std: %f" % Y.std()

    Y = op.rand_gaussian_like(X, mu=5.0, sigma=2.0)
    rtol = 1e-3
    assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 2.0 < rtol, "std: %f" % Y.std()

    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(Xd)
    Y = op.to_cpu(op.rand_gaussian_like(Xd))
    rtol = 1e-2
    assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 1.0 < rtol, "std: %f" % Y.std()

    Y = op.to_cpu(op.rand_gaussian_like(Xd, mu=5.0, sigma=2.0))
    rtol = 1e-2
    assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 2.0 < rtol, "std: %f" % Y.std()
Beispiel #59
0
def test_rand_gaussian():
    X = np.empty((4000, 1000), dtype=np.float32)
    Y = op.rand_gaussian_like(X)
    rtol = 1e-3
    assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 1.0 < rtol, "std: %f" % Y.std()

    Y = op.rand_gaussian_like(X, mu=5.0, sigma=2.0)
    rtol = 1e-3
    assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 2.0 < rtol, "std: %f" % Y.std()

    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(Xd)
    Y = op.to_cpu(op.rand_gaussian_like(Xd))
    rtol = 1e-2
    assert (Y.mean() - 0.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 1.0 < rtol, "std: %f" % Y.std()

    Y = op.to_cpu(op.rand_gaussian_like(Xd, mu=5.0, sigma=2.0))
    rtol = 1e-2
    assert (Y.mean() - 5.0) < rtol, "mean: %f" % Y.mean()
    assert Y.std() - 2.0 < rtol, "std: %f" % Y.std()
Beispiel #60
0
def test_rand():
    X = np.empty((1000, 1000), dtype=np.float32)
    Y = op.rand_uniform_like(X)
    rtol = 1e-3
    assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean()
    assert Y.min() >= 0.0, "min: %f" % Y.min()
    assert Y.min() - 0.0 < rtol, "min: %f" % Y.min()
    assert Y.max() <= 1.0, "max: %f" % Y.max()
    assert Y.max() - 1.0 - rtol, "max: %f" % Y.max()

    Y = np.empty_like(X)
    out = op.rand_uniform_like(X, out=Y)
    assert out is Y
    assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean()
    assert Y.min() >= 0.0, "min: %f" % Y.min()
    assert Y.min() - 0.0 < rtol, "min: %f" % Y.min()
    assert Y.max() <= 1.0, "max: %f" % Y.max()
    assert Y.max() - 1.0 - rtol, "max: %f" % Y.max()

    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(Xd)
    Y = op.to_cpu(op.rand_uniform_like(Xd))
    assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean()
    assert Y.min() >= 0.0, "min: %f" % Y.min()
    assert Y.min() - 0.0 < rtol, "min: %f" % Y.min()
    assert Y.max() <= 1.0, "max: %f" % Y.max()
    assert Y.max() - 1.0 - rtol, "max: %f" % Y.max()

    out = op.rand_uniform_like(Xd, out=Yd)
    assert out is Yd
    Y = op.to_cpu(Yd)
    assert (Y.mean() - 0.5) < rtol, "mean: %f" % Y.mean()
    assert Y.min() >= 0.0, "min: %f" % Y.min()
    assert Y.min() - 0.0 < rtol, "min: %f" % Y.min()
    assert Y.max() <= 1.0, "max: %f" % Y.max()
    assert Y.max() - 1.0 - rtol, "max: %f" % Y.max()