Example #1
0
def test_neibs_gpu():
    if cuda.cuda_available == False:
        raise SkipTest('Optional package cuda disabled')
    for shape, pshape in [((100, 40, 18, 18), (2, 2)),
                          ((100, 40, 6, 18), (3, 2)),
                          ((10, 40, 66, 66), (33, 33)),
                          ((10, 40, 68, 66), (34, 33))
                          ]:

        images = shared(numpy.arange(numpy.prod(shape),
                                     dtype='float32').reshape(shape))
        neib_shape = T.as_tensor_variable(pshape)

        f = function([], images2neibs(images, neib_shape),
                     mode=mode_with_gpu)
        f_gpu = function([], images2neibs(images, neib_shape),
                     mode=mode_with_gpu)
        assert any([isinstance(node.op, GpuImages2Neibs)
                    for node in f_gpu.maker.env.toposort()])
        #print images.get_value(borrow=True)
        neibs = numpy.asarray(f_gpu())
        assert numpy.allclose(neibs, f())
        #print neibs
        g = function([], neibs2images(neibs, neib_shape, images.shape),
                     mode=mode_with_gpu)
        assert any([isinstance(node.op, GpuImages2Neibs)
                    for node in f.maker.env.toposort()])
        #print numpy.asarray(g())
        assert numpy.allclose(images.get_value(borrow=True), g())
Example #2
0
def test_downsample():
    shps = [
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
    ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3, 2), (1, 1):
            if ds[0] > shp[2]:
                continue
            if ds[1] > shp[3]:
                continue
            # GpuDownsampleFactorMax doesn't like having more than 512 columns
            # in the output tensor.
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
                print "test_downsample", shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), "a")
                f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
                f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()])
                assert numpy.allclose(f(), f2())

                g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu)
                g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu)
                assert any(
                    [isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]
                )
                assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()])
                assert numpy.allclose(g(), g2())
Example #3
0
    def _compile_and_check(self,
                           inputs,
                           outputs,
                           numeric_inputs,
                           cls,
                           excluding=None):
        """This tests the infer_shape method only"""
        mode = self.mode
        if excluding:
            mode = mode.excluding(*excluding)

        outputs_function = theano.function(inputs, outputs, mode=mode)
        shapes_function = theano.function(inputs, [o.shape for o in outputs],
                                          mode=mode)
        #theano.printing.debugprint(shapes_function)
        # Check that the Op is removed from the compiled function.
        topo_shape = shapes_function.maker.env.toposort()
        assert not any(isinstance(t.op, cls) for t in topo_shape)
        topo_out = outputs_function.maker.env.toposort()
        assert any(isinstance(t.op, cls) for t in topo_out)
        # Check that the shape produced agrees with the actual shape.
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
            assert numpy.all(out.shape == shape)
Example #4
0
def test_default_conv():
    """Just test that we introduce the right GPU convolution
    version.

    """
    img = theano.tensor.ftensor4()
    fil = theano.tensor.ftensor4()

    c = theano.tensor.nnet.conv2d(img, fil)
    f = theano.function([img, fil], c, mode=theano_mode)

    if cuda.dnn.dnn_available():
        assert any([isinstance(a.op, GpuDnnConv)
                    for a in f.maker.fgraph.apply_nodes])
    else:
        assert any([isinstance(a.op, cuda.blas.GpuCorrMM)
                    for a in f.maker.fgraph.apply_nodes])

    mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([isinstance(a.op, cuda.blas.GpuConv)
                for a in f.maker.fgraph.apply_nodes])

    mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([isinstance(a.op, cuda.blas.GpuConv)
                for a in f.maker.fgraph.apply_nodes])
Example #5
0
def test_pooling_opt():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

    x = T.ftensor4()

    f = theano.function([x],
                        max_pool_2d(x, ds=(2, 2), ignore_border=True),
                        mode=mode_with_gpu)

    assert any([
        isinstance(n.op, cuda.dnn.GpuDnnPool)
        for n in f.maker.fgraph.toposort()
    ])

    f = theano.function([x],
                        T.grad(
                            max_pool_2d(x, ds=(2, 2),
                                        ignore_border=True).sum(), x),
                        mode=mode_with_gpu.including("cudnn"))

    assert any([
        isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
        for n in f.maker.fgraph.toposort()
    ])
Example #6
0
def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = multinomial.MultinomialFromUniform('auto')(r, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval2 = f(pval, uval)
def test_default_conv():
    """Just test that we introduce the right GPU convolution
    version.

    """
    img = theano.tensor.ftensor4()
    fil = theano.tensor.ftensor4()

    c = theano.tensor.nnet.conv2d(img, fil)
    f = theano.function([img, fil], c, mode=theano_mode)

    if cuda.dnn.dnn_available():
        assert any(
            [isinstance(a.op, GpuDnnConv) for a in f.maker.fgraph.apply_nodes])
    else:
        assert any([
            isinstance(a.op, cuda.blas.GpuCorrMM)
            for a in f.maker.fgraph.apply_nodes
        ])

    mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([
        isinstance(a.op, cuda.blas.GpuConv) for a in f.maker.fgraph.apply_nodes
    ])

    mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([
        isinstance(a.op, cuda.blas.GpuConv) for a in f.maker.fgraph.apply_nodes
    ])
Example #8
0
def test_neibs_gpu():
    if cuda.cuda_available == False:
        raise SkipTest('Optional package cuda disabled')
    for shape, pshape in [((100, 40, 18, 18), (2, 2)),
                          ((100, 40, 6, 18), (3, 2)),
                          ((10, 40, 66, 66), (33, 33)),
                          ((10, 40, 68, 66), (34, 33))]:

        images = shared(
            numpy.arange(numpy.prod(shape), dtype='float32').reshape(shape))
        neib_shape = T.as_tensor_variable(pshape)

        f = function([], images2neibs(images, neib_shape), mode=mode_with_gpu)
        f_gpu = function([],
                         images2neibs(images, neib_shape),
                         mode=mode_with_gpu)
        assert any([
            isinstance(node.op, GpuImages2Neibs)
            for node in f_gpu.maker.env.toposort()
        ])
        #print images.get_value(borrow=True)
        neibs = numpy.asarray(f_gpu())
        assert numpy.allclose(neibs, f())
        #print neibs
        g = function([],
                     neibs2images(neibs, neib_shape, images.shape),
                     mode=mode_with_gpu)
        assert any([
            isinstance(node.op, GpuImages2Neibs)
            for node in f.maker.env.toposort()
        ])
        #print numpy.asarray(g())
        assert numpy.allclose(images.get_value(borrow=True), g())
Example #9
0
def test_local_sampling_dot_csr():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_sampling_dot_csr")

    for sp_format in ['csr']:  # Not implemented for other format
        inputs = [
            tensor.matrix(),
            tensor.matrix(),
            getattr(theano.sparse, sp_format + '_matrix')()
        ]

        f = theano.function(inputs, sparse.sampling_dot(*inputs), mode=mode)

        if theano.config.blas.ldflags:
            assert not any(
                isinstance(node.op, sparse.SamplingDot)
                for node in f.maker.fgraph.toposort())
        else:
            # SamplingDotCSR's C implementation needs blas, so it should not
            # be inserted
            assert not any(
                isinstance(node.op, sparse.opt.SamplingDotCSR)
                for node in f.maker.fgraph.toposort())
Example #10
0
    def _compile_and_check(self, inputs, outputs, numeric_inputs, cls,
                           excluding=None, warn=True, check_topo=True):
        """This tests the infer_shape method only

        When testing with input values with shapes that take the same
        value over different dimensions (for instance, a square
        matrix, or a tensor3 with shape (n, n, n), or (m, n, m)), it
        is not possible to detect if the output shape was computed
        correctly, or if some shapes with the same value have been
        mixed up. For instance, if the infer_shape uses the width of a
        matrix instead of its height, then testing with only square
        matrices will not detect the problem. If warn=True, we emit a
        warning when testing with such values.

        :param check_topo: If True, we check that the Op where removed
            from the graph. False is useful to test not implemented case.

        """
        mode = self.mode
        if excluding:
            mode = mode.excluding(*excluding)
        if warn:
            for var, inp in zip(inputs, numeric_inputs):
                if isinstance(inp, (int, float, list, tuple)):
                    inp = var.type.filter(inp)
                if not hasattr(inp, "shape"):
                    continue
                # remove broadcasted dims as it is sure they can't be
                # changed to prevent the same dim problem.
                if hasattr(var.type, "broadcastable"):
                    shp = [inp.shape[i] for i in range(inp.ndim)
                           if not var.type.broadcastable[i]]
                else:
                    shp = inp.shape
                if len(set(shp)) != len(shp):
                    _logger.warn(
                        "While testing the shape inference, we received an"
                        " input with a shape that has some repeated values: %s"
                        ", like a square matrix. This makes it impossible to"
                        " check if the values for these dimensions have been"
                        " correctly used, or if they have been mixed up.",
                        str(inp.shape))
                    break

        outputs_function = theano.function(inputs, outputs, mode=mode)
        shapes_function = theano.function(inputs, [o.shape for o in outputs],
                                          mode=mode)
        #theano.printing.debugprint(shapes_function)
        # Check that the Op is removed from the compiled function.
        if check_topo:
            topo_shape = shapes_function.maker.fgraph.toposort()
            assert not any(isinstance(t.op, cls) for t in topo_shape)
        topo_out = outputs_function.maker.fgraph.toposort()
        assert any(isinstance(t.op, cls) for t in topo_out)
        # Check that the shape produced agrees with the actual shape.
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
            assert numpy.all(out.shape == shape)
Example #11
0
 def _get_kernel_flags(self, *dtypes):
     dtypes = [numpy.dtype(d) for d in dtypes]
     flags = ['GA_USE_CLUDA']
     if any(d == numpy.float64 for d in dtypes):
         flags.append('GA_USE_DOUBLE')
     if any(d.itemsize < 4 for d in dtypes):
         flags.append('GA_USE_SMALL')
     return '|'.join(flags)
Example #12
0
 def _get_kernel_flags(self, *dtypes):
     dtypes = [numpy.dtype(d) for d in dtypes]
     flags = ['GA_USE_CLUDA']
     if any(d == numpy.float64 for d in dtypes):
         flags.append('GA_USE_DOUBLE')
     if any(d.itemsize < 4 for d in dtypes):
         flags.append('GA_USE_SMALL')
     return '|'.join(flags)
Example #13
0
def test_GpuCrossentropySoftmax1HotWithBiasDx():
    """
    This is basic test for GpuCrossentropySoftmax1HotWithBiasDx

    We check that we loop when their is too much threads
    TODO: check that we loop when their is too much block(>32*1024)

    """
    n_in = 1000
    batch_size = 4097
    n_out = 1250

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    softmax_output_value = numpy.random.rand(batch_size, n_out).astype("float32")
    dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype="float32")
    y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size)

    softmax_output = T.fmatrix()
    softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1)
    op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value)

    cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu)
    gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu)
    # theano.printing.debugprint(cpu_f)
    # theano.printing.debugprint(gpu_f)

    assert any(
        [isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx) for node in cpu_f.maker.fgraph.toposort()]
    )
    assert any(
        [isinstance(node.op, cuda.nnet.GpuCrossentropySoftmax1HotWithBiasDx) for node in gpu_f.maker.fgraph.toposort()]
    )

    cpu_out = cpu_f(softmax_output_value)
    gpu_out = gpu_f(softmax_output_value)

    rtol = 1e-5
    atol = 1e-6
    if not numpy.allclose(cpu_out, gpu_out, rtol=rtol, atol=atol):
        abs_err, rel_err = T.numeric_grad.abs_rel_err(cpu_out, gpu_out)
        scaled_err = numpy.minimum(abs_err / atol, rel_err / rtol)
        max_i = scaled_err.argmax()

        print "max err index:", max_i, max_i / batch_size,
        print max_i % batch_size, max_i / n_out, max_i & n_out
        print "At that index:"
        print "err:", scaled_err.flatten()[max_i]
        print "absolute error:", abs_err.flatten()[max_i]
        print "relative error:", rel_err.flatten()[max_i]
        print "cpu_out:", cpu_out.flatten()[max_i]
        print "gpu_out:", gpu_out.flatten()[max_i]
        print "softmax_output_value:", softmax_output_value.flatten()[max_i]
        print "dnll_value:", dnll_value[max_i / n_out]
        print "y_idx_value:", y_idx_value[max_i / n_out]

        assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % (rtol, atol)
Example #14
0
def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
            iters=10, order='C'):
    """
    :param execute: If True, execute a Theano function that should call gemm.
    :param verbose: If True, will print some Theano flags and env variables.
    :param M,N,K: The M,N,K size used by gemm.
    :param iters: The number of calls to gemm to do.

    :return: a tuple (execution time,
                      str that represents the implementation used)
    """

    a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
                                 order=order))
    b = theano.shared(numpy.ones((N, K), dtype=theano.config.floatX,
                                 order=order))
    c = theano.shared(numpy.ones((M, K), dtype=theano.config.floatX,
                                 order=order))
    f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})

    if verbose:
        print 'Some Theano flags:'
        print '    blas.ldflags=', theano.config.blas.ldflags
        print '    compiledir=', theano.config.compiledir
        print '    floatX=', theano.config.floatX
        print 'Some environment variables:'
        print '    MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
        print '    OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
        print '    GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
        print
        print ('Numpy config: (used when the Theano flag'
               ' "blas.ldflags" is empty)')
        numpy.show_config()
        print 'Numpy dot module:', numpy.dot.__module__
        print 'Numpy location:', numpy.__file__
        print 'Numpy version:', numpy.__version__
        print
    t0 = 0
    t1 = -1

    if any([x.op.__class__.__name__ == 'Gemm' for x in
            f.maker.env.toposort()]):
        impl = 'cpu'
    elif any([x.op.__class__.__name__ == 'GpuGemm' for x in
              f.maker.env.toposort()]):
        impl = 'gpu'
    else:
        impl = 'ERROR, unable to tell if Theano used the cpu or the gpu:\n'
        impl += str(f.maker.env.toposort())

    if execute:
        t0 = time.time()
        for i in range(iters):
            f()
        t1 = time.time()
    return t1 - t0, impl
Example #15
0
def execute(execute=True, verbose=True):

    a = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
    b = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
    c = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))

    f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})

    if verbose:
        print 'Some theano flags:'
        print '    blas.ldflags=', theano.config.blas.ldflags
        print '    compiledir=', theano.config.compiledir
        print '    floatX=', theano.config.floatX
        print 'Some env flags:'
        print '    MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
        print '    OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
        print '    GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
        print
        print(
            'Numpy config: (used when the theano flags'
            ' "blas.ldflags" is empty)')
        numpy.show_config()
        print 'Numpy dot module:', numpy.dot.__module__
        print 'Numpy file location that was loaded:', numpy.__file__
        print 'Numpy version:', numpy.__version__
        print
        if any([
                x.op.__class__.__name__ == 'Gemm'
                for x in f.maker.env.toposort()
        ]):
            print 'Used the cpu'
        elif any([
                x.op.__class__.__name__ == 'GpuGemm'
                for x in f.maker.env.toposort()
        ]):
            print 'Used the gpu'
        else:
            print 'ERROR, not able to tell if theano used the cpu or the gpu'
            print f.maker.env.toposort()
    t0 = 0
    t1 = -1

    if execute:
        t0 = time.time()
        for i in range(iters):
            f()
        t1 = time.time()
    if verbose and execute:
        print
        print 'This execution time took %.2fs' % (t1 - t0)
        print
        print(
            'Try to run this script a few times. Experience show that'
            ' the first time is not as fast as followings call. The'
            ' difference is not big, but consistent.')
    return t1 - t0
Example #16
0
    def _compile_and_check(self, inputs, outputs, numeric_inputs, cls,
                           excluding=None, warn=True):
        """This tests the infer_shape method only

        When testing with input values with shapes that take the same
        value over different dimensions (for instance, a square
        matrix, or a tensor3 with shape (n, n, n), or (m, n, m)), it
        is not possible to detect if the output shape was computed
        correctly, or if some shapes with the same value have been
        mixed up. For instance, if the infer_shape uses the width of a
        matrix instead of its height, then testing with only square
        matrices will not detect the problem. If warn=True, we emit a
        warning when testing with such values.

        """
        mode = self.mode
        if excluding:
            mode = mode.excluding(*excluding)
        if warn:
            for var, inp in zip(inputs, numeric_inputs):
                if isinstance(inp, (int, float, list, tuple)):
                    inp = var.type.filter(inp)
                if not hasattr(inp, "shape"):
                    continue
                # remove broadcasted dims as it is sure they can't be
                # changed to prevent the same dim problem.
                if hasattr(var.type, "broadcastable"):
                    shp = [inp.shape[i] for i in range(inp.ndim)
                           if not var.type.broadcastable[i]]
                else:
                    shp = inp.shape
                if len(set(shp)) != len(shp):
                    _logger.warn(
                        "While testing the shape inference, we received an"
                        " input with a shape that has some repeated values: %s"
                        ", like a square matrix. This makes it impossible to"
                        " check if the values for these dimensions have been"
                        " correctly used, or if they have been mixed up.",
                        str(inp.shape))
                    break

        outputs_function = theano.function(inputs, outputs, mode=mode)
        shapes_function = theano.function(inputs, [o.shape for o in outputs],
                                          mode=mode)
        #theano.printing.debugprint(shapes_function)
        # Check that the Op is removed from the compiled function.
        topo_shape = shapes_function.maker.fgraph.toposort()
        assert not any(isinstance(t.op, cls) for t in topo_shape)
        topo_out = outputs_function.maker.fgraph.toposort()
        assert any(isinstance(t.op, cls) for t in topo_out)
        # Check that the shape produced agrees with the actual shape.
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
            assert numpy.all(out.shape == shape)
Example #17
0
def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order="C"):
    """
    :param execute: If True, execute a Theano function that should call gemm.
    :param verbose: If True, will print some Theano flags and env variables.
    :param M,N,K: The M,N,K size used by gemm.
    :param iters: The number of calls to gemm to do.

    :return: a tuple (execution time,
                      str that represents the implementation used)
    """

    if verbose:
        print "Some Theano flags:"
        print "    blas.ldflags=", theano.config.blas.ldflags
        print "    compiledir=", theano.config.compiledir
        print "    floatX=", theano.config.floatX
        print "Some environment variables:"
        print "    MKL_NUM_THREADS=", os.getenv("MKL_NUM_THREADS")
        print "    OMP_NUM_THREADS=", os.getenv("OMP_NUM_THREADS")
        print "    GOTO_NUM_THREADS=", os.getenv("GOTO_NUM_THREADS")
        print
        print ("Numpy config: (used when the Theano flag" ' "blas.ldflags" is empty)')
        numpy.show_config()
        print "Numpy dot module:", numpy.dot.__module__
        print "Numpy location:", numpy.__file__
        print "Numpy version:", numpy.__version__
        print

    a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX, order=order))
    b = theano.shared(numpy.ones((N, K), dtype=theano.config.floatX, order=order))
    c = theano.shared(numpy.ones((M, K), dtype=theano.config.floatX, order=order))
    f = theano.function([], updates={c: 0.4 * c + 0.8 * T.dot(a, b)}, mode=theano.compile.ProfileMode())

    if any([x.op.__class__.__name__ == "Gemm" for x in f.maker.env.toposort()]):
        c_impl = f.profile.apply_cimpl.values()
        assert len(c_impl) == 1
        if c_impl[0]:
            impl = "CPU (with direct Theano binding to blas)"
        else:
            impl = "CPU (without direct Theano binding to blas but with numpy/scipy binding to blas)"
    elif any([x.op.__class__.__name__ == "GpuGemm" for x in f.maker.env.toposort()]):
        impl = "GPU"
    else:
        impl = "ERROR, unable to tell if Theano used the cpu or the gpu:\n"
        impl += str(f.maker.env.toposort())

    t0 = 0
    t1 = -1

    if execute:
        t0 = time.time()
        for i in range(iters):
            f()
        t1 = time.time()
    return t1 - t0, impl
Example #18
0
def execute(execute=True, verbose=True):

    a = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
    b = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
    c = theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))

    f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})

    if verbose:
        print 'Some theano flags:'
        print '    blas.ldflags=', theano.config.blas.ldflags
        print '    compiledir=', theano.config.compiledir
        print '    floatX=', theano.config.floatX
        print 'Some env flags:'
        print '    MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
        print '    OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
        print '    GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
        print
        print ('Numpy config: (used when the theano flags'
               ' "blas.ldflags" is empty)')
        numpy.show_config()
        print 'Numpy dot module:', numpy.dot.__module__
        print 'Numpy file location that was loaded:', numpy.__file__
        print 'Numpy version:', numpy.__version__
        print
        if any([x.op.__class__.__name__ == 'Gemm' for x in
                f.maker.env.toposort()]):
            print 'Used the cpu'
        elif any([x.op.__class__.__name__ == 'GpuGemm' for x in
                  f.maker.env.toposort()]):
            print 'Used the gpu'
        else:
            print 'ERROR, not able to tell if theano used the cpu or the gpu'
            print f.maker.env.toposort()
    t0 = 0
    t1 = -1

    if execute:
        t0 = time.time()
        for i in range(iters):
            f()
        t1 = time.time()
    if verbose and execute:
        print
        print 'This execution time took %.2fs' % (t1 - t0)
        print
        print ('Try to run this script a few times. Experience show that'
               ' the first time is not as fast as followings call. The'
               ' difference is not big, but consistent.')
    return t1 - t0
Example #19
0
 def _compile_and_check(self, inputs, outputs, numeric_inputs, cls):
     outputs_function = theano.function(inputs, outputs, mode=self.mode)
     shapes_function = theano.function(inputs, [o.shape for o in outputs],
             mode=self.mode)
     #theano.printing.debugprint(shapes_function)
     # Check that the Op is removed from the compiled function.
     topo_shape = shapes_function.maker.env.toposort()
     assert not any(isinstance(t.op, cls) for t in topo_shape)
     topo_out = outputs_function.maker.env.toposort()
     assert any(isinstance(t.op, cls) for t in topo_out)
     # Check that the shape produced agrees with the actual shape.
     numeric_outputs = outputs_function(*numeric_inputs)
     numeric_shapes = shapes_function(*numeric_inputs)
     for out, shape in zip(numeric_outputs, numeric_shapes):
         assert numpy.all(out.shape == shape)
Example #20
0
def _toposort(edges):
    """ Topological sort algorithm by Kahn [1] - O(nodes + vertices)

    inputs:
        edges - a dict of the form {a: {b, c}} where b and c depend on a
    outputs:
        L - an ordered list of nodes that satisfy the dependencies of edges

    >>> _toposort({1: {2, 3}, 2: (3, )})
    [1, 2, 3]

    Closely follows the wikipedia page [2]

    [1] Kahn, Arthur B. (1962), "Topological sorting of large networks",
    Communications of the ACM
    [2] http://en.wikipedia.org/wiki/Toposort#Algorithms
    """
    incoming_edges = reverse_dict(edges)
    incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items())
    S = set((v for v in edges if v not in incoming_edges))
    L = []

    while S:
        n = S.pop()
        L.append(n)
        for m in edges.get(n, ()):
            assert n in incoming_edges[m]
            incoming_edges[m].remove(n)
            if not incoming_edges[m]:
                S.add(m)
    if any(incoming_edges.get(v, None) for v in edges):
        raise ValueError("Input has cycles")
    return L
    def body(mode, gpu):
        #the m*2 allows the multinomial to reuse output
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([
                type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()
            ])

        # test that both first and second samples can be drawn
        assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]), [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.31, .31])
        assert numpy.allclose(r, [[0, 2], [0, 2]]), r

        # test that both first labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.21, .21])
        assert numpy.allclose(r, [[0, 2], [2, 0]]), r

        #change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[.2, .8]], [.25])
        assert numpy.allclose(r, [[0, 2]]), r
Example #22
0
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = multinomial.MultinomialFromUniform('auto')(p,u)
        f = function([p,u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.env.toposort()])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
        pval = pval / pval.sum(axis=1)[:,None]
        uval = numpy.ones_like(pval[:,0]) * 0.5
        mval = f(pval,uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
            assert mval.dtype == pval.dtype
        elif config.cast_policy == 'numpy+floatX':
            assert mval.dtype == config.floatX
        elif config.cast_policy == 'numpy':
            assert mval.dtype == 'float64'
        else:
            raise NotImplementedError(config.cast_policy)
        assert numpy.allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0])+0*pval
        assert numpy.allclose(mval, asdf) #broadcast over all rows
Example #23
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b, c], [],
                  updates=[(a, tensor.dot(a, b) + tensor.exp(c))],
                  mode=mode_with_gpu)
        assert any([
            node.op == tcn.blas.gpu_gemm_inplace
            for node in f.maker.fgraph.toposort()
        ])

        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0], b_shp[1])
        f(bval, cval)

        assert numpy.allclose(
            numpy.dot(a0, bval) + numpy.exp(cval), a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True,
                                return_internal_type=True)[::-1, ::-1],
                    borrow=True)
        f(bval, cval)
Example #24
0
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
                    apply_time, apply_cimpl, message, outputs_size,
                    other_time):
    # Scan overhead profile
    if any([isinstance(node.op, Scan) and v > 0 for (_, node), v in
            apply_time.items()]):
        print
        print 'Scan overhead:'
        print ('<Scan op time(s)> <sub scan fct time(s)> <sub scan op '
               'time(s)> <sub scan fct time(% scan op time)> <sub scan '
               'op time(% scan op time)> <node>')
        total_super_scan_time = 0
        total_scan_fct_time = 0
        total_scan_op_time = 0
        for (_, node), v in apply_time.items():
            if isinstance(node.op, Scan):
                if v > 0:
                    scan_fct_time = node.op.mode_instance.fn_time
                    scan_op_time = node.op.mode_instance.local_time
                    total_super_scan_time += v
                    total_scan_fct_time += scan_fct_time
                    total_scan_op_time += scan_op_time
                    print '    %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
                        v, scan_fct_time, scan_op_time,
                        scan_fct_time / v * 100, scan_op_time / v * 100), node
                else:
                    print (' The node took 0s, so we can not compute the '
                           'overhead'), node
        print '    total %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
            total_super_scan_time, total_scan_fct_time, total_scan_op_time,
            total_scan_fct_time / total_super_scan_time * 100,
            total_scan_op_time / total_super_scan_time * 100)
Example #25
0
    def test_neibs(self):
        for shape, pshape in [((100, 40, 18, 18), (2, 2)),
                              ((100, 40, 6, 18), (3, 2)),
                              ((10, 40, 66, 66), (33, 33)),
                              ((10, 40, 68, 66), (34, 33))
                                  ]:
            for border in ['valid', 'ignore_borders']:
                for dtype in self.dtypes:
                    images = shared(
                            numpy.arange(numpy.prod(shape), dtype=dtype
                            ).reshape(shape))
                    neib_shape = T.as_tensor_variable(pshape)

                    f = function([],
                                 images2neibs(images, neib_shape, mode=border),
                                 mode=self.mode)

                    #print images.get_value(borrow=True)
                    neibs = f()
                    #print neibs
                    g = function([],
                                 neibs2images(neibs, neib_shape, images.shape),
                                 mode=self.mode)
                    if border in ['valid']:
                        assert any([isinstance(node.op, self.op)
                                    for node in f.maker.fgraph.toposort()])

                    #print g()
                    assert numpy.allclose(images.get_value(borrow=True), g())
Example #26
0
def test_elemwise_composite_float64():
    # test that we don't fuse composite elemwise with float64 somewhere inside
    # nvcc by default downcast them to float32. We would need to tell him not
    # to do so, but that possible only on some device.
    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4, 4), dtype='float32')
    bv = numpy.ones((4, 4), dtype='float32')

    def get_all_basic_scalar(composite_op):
        l = []
        for i in composite_op.fgraph.toposort():
            if isinstance(i, theano.scalar.Composite):
                l += get_all_basic_scalar(i)
            else:
                l.append(i)
        return l
    for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'),
                 mode_with_gpu.excluding('elemwise_fusion')]:
        f = pfunc([a, b],
                  tensor.cast(tensor.lt(tensor.cast(a, 'float64') ** 2,
                                               b),
                                     'float32'), mode=mode)

        out = f(av, bv)
        assert numpy.all(out == ((av ** 2) < bv))
        for node in f.maker.fgraph.toposort():
            if isinstance(node.op, cuda.GpuElemwise):
                if isinstance(node.op.scalar_op, theano.scalar.Composite):
                    scals = get_all_basic_scalar(node.op.scalar_op)
                    for s in scals:
                        assert not any([i.type.dtype == 'float64'
                                        for i in s.inputs + s.outputs])
Example #27
0
def _toposort(edges):
    """ Topological sort algorithm by Kahn [1] - O(nodes + vertices)

    inputs:
        edges - a dict of the form {a: {b, c}} where b and c depend on a
    outputs:
        L - an ordered list of nodes that satisfy the dependencies of edges

    >>> _toposort({1: {2, 3}, 2: (3, )})
    [1, 2, 3]

    Closely follows the wikipedia page [2]

    [1] Kahn, Arthur B. (1962), "Topological sorting of large networks",
    Communications of the ACM
    [2] http://en.wikipedia.org/wiki/Toposort#Algorithms
    """
    incoming_edges = reverse_dict(edges)
    incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items())
    S = set((v for v in edges if v not in incoming_edges))
    L = []

    while S:
        n = S.pop()
        L.append(n)
        for m in edges.get(n, ()):
            assert n in incoming_edges[m]
            incoming_edges[m].remove(n)
            if not incoming_edges[m]:
                S.add(m)
    if any(incoming_edges.get(v, None) for v in edges):
        raise ValueError("Input has cycles")
    return L
Example #28
0
    def multinomial(self, size=None, n=1, pvals=None, ndim=None, dtype='int64',
                    nstreams=None):
        """
        Sample `n` (currently `n` needs to be 1) times from a multinomial
        distribution defined by probabilities pvals.

        Example : pvals = [[.98, .01, .01], [.01, .98, .01]] will
        probably result in [[1,0,0],[0,1,0]].

        .. note::
            `size` and `ndim` are only there keep the same signature as other
            uniform, binomial, normal, etc.
            todo : adapt multinomial to take that into account
        """
        if pvals is None:
            raise TypeError("You have to specify pvals")
        pvals = as_tensor_variable(pvals)
        if size is not None:
            if any([isinstance(i, int) and i <= 0 for i in size]):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
                    size)

        if n == 1 and pvals.ndim == 2:
            ndim, size, bcast = raw_random._infer_ndim_bcast(
                    ndim, size, pvals[:,0])
            assert ndim==1
            bcast = bcast+(pvals.type.broadcastable[-1],)
            unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
            op = multinomial.MultinomialFromUniform(dtype)
            return op(pvals, unis)
        else:
            raise NotImplementedError(("MRG_RandomStreams.multinomial only"
                " implemented with n == 1 and pvals.ndim = 2"))
Example #29
0
    def test_neibs_manual(self):
        shape = (2, 3, 4, 4)
        for dtype in self.dtypes:
            images = shared(
                numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape))
            neib_shape = T.as_tensor_variable((2, 2))

            for border in ['valid', 'ignore_borders']:
                f = function([],
                             images2neibs(images, neib_shape, mode=border),
                             mode=self.mode)
                assert any([
                    isinstance(node.op, self.op)
                    for node in f.maker.fgraph.toposort()
                ])

                #print images.get_value(borrow=True)
                neibs = f()
                #print neibs
                assert numpy.allclose(
                    neibs,
                    [[0, 1, 4, 5], [2, 3, 6, 7], [8, 9, 12, 13],
                     [10, 11, 14, 15], [16, 17, 20, 21], [18, 19, 22, 23],
                     [24, 25, 28, 29], [26, 27, 30, 31], [32, 33, 36, 37],
                     [34, 35, 38, 39], [40, 41, 44, 45], [42, 43, 46, 47],
                     [48, 49, 52, 53], [50, 51, 54, 55], [56, 57, 60, 61],
                     [58, 59, 62, 63], [64, 65, 68, 69], [66, 67, 70, 71],
                     [72, 73, 76, 77], [74, 75, 78, 79], [80, 81, 84, 85],
                     [82, 83, 86, 87], [88, 89, 92, 93], [90, 91, 94, 95]])
                g = function([],
                             neibs2images(neibs, neib_shape, images.shape),
                             mode=self.mode)

                assert numpy.allclose(images.get_value(borrow=True), g())
Example #30
0
def test_elemwise_composite_float64():
    # test that we don't fuse composite elemwise with float64 somewhere inside
    # nvcc by default downcast them to float32. We would need to tell him not
    # to do so, but that possible only on some device.
    a = tensor.fmatrix()
    b = tensor.fmatrix()
    av = theano._asarray(numpy.random.rand(4, 4), dtype='float32')
    bv = numpy.ones((4, 4), dtype='float32')

    def get_all_basic_scalar(composite_op):
        l = []
        for i in composite_op.env.toposort():
            if isinstance(i, theano.scalar.Composite):
                l += get_all_basic_scalar(i)
            else:
                l.append(i)
        return l
    for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'),
                 mode_with_gpu.excluding('elemwise_fusion')]:
        f = pfunc([a, b],
                  tensor.cast(tensor.lt(tensor.cast(a, 'float64') ** 2,
                                               b),
                                     'float32'), mode=mode)

        out = f(av, bv)
        assert numpy.all(out == ((av ** 2) < bv))
        for node in f.maker.env.toposort():
            if isinstance(node.op, cuda.GpuElemwise):
                if isinstance(node.op.scalar_op, theano.scalar.Composite):
                    scals = get_all_basic_scalar(node.op.scalar_op)
                    for s in scals:
                        assert not any([i.type.dtype == 'float64'
                                        for i in s.inputs + s.outputs])
Example #31
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc(
                [b, b2],
                [tensor.dot(a, b2) + c],
                updates=[(a, tensor.dot(a, b) + c)],
                mode=mode_with_gpu)

        assert any([node.op == tcn.blas.gpu_gemm_no_inplace
            for node in f.maker.env.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
                a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                borrow=True)
        f(bval, bval2)
Example #32
0
def list_of_nodes(inputs, outputs):
    """ Return the apply nodes of the graph between inputs and outputs """
    return stack_search(
            deque([o.owner for o in outputs]),
            lambda o: [inp.owner for inp in o.inputs
                           if inp.owner
                           and not any(i in inp.owner.outputs for i in inputs)])
Example #33
0
def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
        p, u = node.inputs
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and any([
                i.owner
                and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu)
                for i in node.inputs
        ])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
            return [
                host_from_gpu(gpu_op(*[gpu_from_host(i)
                                       for i in node.inputs])).T
            ]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost)
            and node.inputs[0].owner
            and type(node.inputs[0].owner.op) is MultinomialFromUniform):
        multi = node.inputs[0].owner
        p, u = multi.inputs
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
            # The dimshuffle is on the cpu, but will be moved to the gpu by an opt.
            return [gpu_from_host(ret)]
Example #34
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc([b, b2], [tensor.dot(a, b2) + c],
                  updates=[(a, tensor.dot(a, b) + c)],
                  mode=mode_with_gpu)

        assert any([
            node.op == tcn.blas.gpu_gemm_no_inplace
            for node in f.maker.env.toposort()
        ])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True,
                                return_internal_type=True)[::-1, ::-1],
                    borrow=True)
        f(bval, bval2)
Example #35
0
def list_of_nodes(inputs, outputs):
    """ Return the apply nodes of the graph between inputs and outputs """
    return stack_search(
            deque([o.owner for o in outputs]),
            lambda o: [inp.owner for inp in o.inputs
                           if inp.owner
                           and not any(i in inp.owner.outputs for i in inputs)])
Example #36
0
        def local_opt(node):
            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([
                        i.owner and i.owner.op == host_from_gpu
                        for i in node.inputs
                ]) or all([
                        c != 'output' and c.op == gpu_from_host
                        for c, idx in node.outputs[0].clients
                ])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [
                                safe_to_cpu(o)
                                for o in new_op(*node.inputs, return_list=True)
                            ]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Example #37
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')

        b = tensor.fmatrix('b')
        c = tensor.fmatrix('c')

        f = pfunc([b, c], [], updates=[(a, tensor.dot(a, b) + tensor.exp(c))],
                mode=mode_with_gpu)
        assert any([node.op == tcn.blas.gpu_gemm_inplace
            for node in f.maker.fgraph.toposort()])

        bval = my_rand(*b_shp)
        cval = my_rand(a_shp[0], b_shp[1])
        f(bval, cval)

        assert numpy.allclose(numpy.dot(a0, bval) + numpy.exp(cval),
                a.get_value())

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
                a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                borrow=True)
        f(bval, cval)
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=theano_mode)
            #theano.printing.debugprint(func,)
            assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Example #39
0
    def test_neibs(self):
        for shape, pshape in [((10, 7, 18, 18), (2, 2)),
                              ((10, 7, 6, 18), (3, 2)),
                              ((5, 7, 66, 66), (33, 33)),
                              ((5, 7, 68, 66), (34, 33))]:
            for border in ['valid', 'ignore_borders']:
                for dtype in self.dtypes:
                    images = shared(
                        numpy.arange(numpy.prod(shape),
                                     dtype=dtype).reshape(shape))
                    neib_shape = T.as_tensor_variable(pshape)

                    f = function([],
                                 images2neibs(images, neib_shape, mode=border),
                                 mode=self.mode)

                    #print images.get_value(borrow=True)
                    neibs = f()
                    #print neibs
                    g = function([],
                                 neibs2images(neibs, neib_shape, images.shape),
                                 mode=self.mode)
                    if border in ['valid']:
                        assert any([
                            isinstance(node.op, self.op)
                            for node in f.maker.fgraph.toposort()
                        ])

                    #print g()
                    assert numpy.allclose(images.get_value(borrow=True), g())
Example #40
0
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=theano_mode)
            #theano.printing.debugprint(func,)
            assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Example #41
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and
            isinstance(gpu_x.owner.op, GpuFromHost) and
            # And it is a shared var or an input of the graph.
            not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
                                              for v in n.inputs + n.outputs])
                        for n,_  in node.outputs[0].clients]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Example #42
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        if (hasattr(theano, 'sandbox') and
            hasattr(theano.sandbox, 'cuda') and
            theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        #initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(
                    ret.fn.thunk_groups[i][0],
                    'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        #capture old fn in closure. This is important since new_fn is about to
        #take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.variable_shape

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk

        return ret
Example #43
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.
        """
        flags = [flag for flag in config.nvcc.flags.split(" ") if flag]
        if config.nvcc.fastmath:
            flags.append("-use_fast_math")
        cuda_ndarray_cuh_hash = hash_from_file(os.path.join(os.path.split(__file__)[0], "cuda_ndarray.cuh"))
        flags.append("-DCUDA_NDARRAY_CUH=" + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API. I updated most of the places
        # to use the new API, but not everywhere. When finished, enable
        # the following macro to assert that we don't bring new code
        # that use the old API.
        flags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # numpy 1.7 deprecated the following macro but the didn't
        # existed in the past
        numpy_ver = [int(n) for n in numpy.__version__.split(".")[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(["-arch=sm_" in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda

            if hasattr(theano.sandbox, "cuda"):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now."
                    )
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False,
                    )
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append("-arch=sm_" + str(p["major"]) + str(p["minor"]))

        return flags
Example #44
0
def test_many_arg_elemwise():
    """this test checks whether the + and * elemwise ops can handle extremely large numbers of
    arguments on gpu
    i.e., it is a test of the optimization theano/sandbox/cuda/opt.py:local_gpu_huge_add_or_mul """
    rng = numpy.random.RandomState([1, 2, 3])

    for num_args in [25]:
        for op_to_test in [theano.tensor.add, theano.tensor.mul]:
            for nb_dim in [2, 3, 4, 5]:
                shapes = [rng.randint(1, 5) for i in range(nb_dim)]
                args = [numpy.cast['float32'](rng.randn(*shapes))
                        for arg in xrange(0, num_args)]

                symb_args = [theano.tensor.TensorType('float32',
                                                      (False,)*nb_dim)()
                             for arg in xrange(0, num_args)]


                outputs = []
                for mode in [mode_with_gpu, mode_without_gpu]:
                    #test the optijmization local_gpu_elemwise_0
                    f = theano.function(
                        symb_args, op_to_test(*symb_args),
                        mode=mode.excluding("local_gpu_elemwise_1"))
                    outputs.append(f(*args))
                    #assert that the test was done on the gpu.
                    if mode is mode_with_gpu:
                        assert any([isinstance(node.op, cuda.GpuElemwise)
                                    for node in f.maker.env.nodes])

                    #test the optijmization local_gpu_elemwise_1
                    f = theano.function(
                        symb_args,
                        cuda.gpu_from_host(op_to_test(*symb_args)),
                        mode=mode.excluding("local_gpu_elemwise_0"))
                    out = f(*args)
                    #assert that the test was done on the gpu.
                    if mode is mode_with_gpu:
                        assert any([isinstance(node.op, cuda.GpuElemwise)
                                    for node in f.maker.env.nodes])
                    assert numpy.allclose(out, outputs[-1])

                results_gpu, results_cpu = outputs

                assert numpy.allclose(results_gpu, results_cpu)
Example #45
0
def test_many_arg_elemwise():
    """this test checks whether the + and * elemwise ops can handle extremely large numbers of
    arguments on gpu
    i.e., it is a test of the optimization theano/sandbox/cuda/opt.py:local_gpu_huge_add_or_mul """
    rng = numpy.random.RandomState([1, 2, 3])

    for num_args in [25]:
        for op_to_test in [theano.tensor.add, theano.tensor.mul]:
            for nb_dim in [2, 3, 4, 5]:
                shapes = [rng.randint(1, 5) for i in range(nb_dim)]
                args = [numpy.cast['float32'](rng.randn(*shapes))
                        for arg in xrange(0, num_args)]

                symb_args = [theano.tensor.TensorType('float32',
                                                      (False,)*nb_dim)()
                             for arg in xrange(0, num_args)]


                outputs = []
                for mode in [mode_with_gpu, mode_without_gpu]:
                    #test the optijmization local_gpu_elemwise_0
                    f = theano.function(
                        symb_args, op_to_test(*symb_args),
                        mode=mode.excluding("local_gpu_elemwise_1"))
                    outputs.append(f(*args))
                    #assert that the test was done on the gpu.
                    if mode is mode_with_gpu:
                        assert any([isinstance(node.op, cuda.GpuElemwise)
                                    for node in f.maker.fgraph.apply_nodes])

                    #test the optijmization local_gpu_elemwise_1
                    f = theano.function(
                        symb_args,
                        cuda.gpu_from_host(op_to_test(*symb_args)),
                        mode=mode.excluding("local_gpu_elemwise_0"))
                    out = f(*args)
                    #assert that the test was done on the gpu.
                    if mode is mode_with_gpu:
                        assert any([isinstance(node.op, cuda.GpuElemwise)
                                    for node in f.maker.fgraph.apply_nodes])
                    assert numpy.allclose(out, outputs[-1])

                results_gpu, results_cpu = outputs

                assert numpy.allclose(results_gpu, results_cpu)
Example #46
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.
        """
        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
        if config.nvcc.fastmath:
            flags.append('-use_fast_math')
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
        flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API. I updated most of the places
        # to use the new API, but not everywhere. When finished, enable
        # the following macro to assert that we don't bring new code
        # that use the old API.
        flags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # numpy 1.7 deprecated the following macro but the didn't
        # existed in the past
        numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(['-arch=sm_' in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda
            if hasattr(theano.sandbox, 'cuda'):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now.")
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False)
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append('-arch=sm_' + str(p['major']) +
                             str(p['minor']))

        return flags
Example #47
0
    def create(self, input_storage=None, trustme=False):
        ret = super(Profile_Maker, self).create(input_storage, trustme)

        if (hasattr(theano, 'sandbox') and hasattr(theano.sandbox, 'cuda')
                and theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        # create a function-specific storage container for profiling info
        profile = ProfileStats(atexit_print=False)
        self.mode.profile_stats[ret] = profile
        ret.profile = profile

        #initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
            assert len(ret.fn.thunk_groups[i]) == 1
            profile.apply_cimpl[node] = hasattr(ret.fn.thunk_groups[i][0],
                                                'cthunk')

        # Here we replace the linker function.
        # This ugliness makes WrapLinker (an object that *generates*
        # functions and is not function-specific)  work with ProfileStats
        # objects which are function-specific.

        #capture old fn in closure. This is important since new_fn is about to
        #take its place as ret.fn.
        ret_fn = ret.fn

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
            self.mode.variable_shape = self.mode.profile_stats[
                ret].variable_shape
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
            del self.mode.variable_shape

        ret.fn = new_fn

        global run_cthunk
        if run_cthunk is None and any(profile.apply_cimpl.values()):
            # Lazy import to avoid compilation when importing theano.
            from theano.gof.cutils import run_cthunk

        return ret
Example #48
0
def filter_nvcc_flags(s):
    assert isinstance(s, str)
    flags = [flag for flag in s.split(' ') if flag]
    if any([f for f in flags if not f.startswith("-")]):
        raise ValueError(
            "Theano nvcc.flags support only parameter/value pairs without"
            " space between them. e.g.: '--machine 64' is not supported,"
            " but '--machine=64' is supported. Please add the '=' symbol."
            " nvcc.flags value is '%s'" % s)
    return ' '.join(flags)
Example #49
0
def test_reshape():

    a = tcn.CudaNdarrayType((False,))()
    b = tcn.CudaNdarrayType((False, False))()
    c = T.reshape(a, [2, 3])

    #basic
    f = theano.function([a], c, mode=mode_with_gpu)
    fv = f(cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                    dtype='float32')))
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(fv == numpy.asarray([[0, 1, 2], [3, 4, 5]]))

    #test that it works without inplace operations
    a_val = cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                     dtype='float32'))
    a_val_copy = cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                          dtype='float32'))
    b_val = cuda_ndarray.CudaNdarray(theano._asarray([[0, 1, 2], [3, 4, 5]],
                                                     dtype='float32'))

    f_sub = theano.function([a, b], c - b, mode=mode_with_gpu)
    topo = f_sub.maker.fgraph.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(f_sub(a_val, b_val) == 0.0)
    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))

    #test that it works with inplace operations
    a_val = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float32')
    a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float32')
    b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')

    f_sub = theano.function([a, b], c - b, mode=mode_with_gpu)
    topo = f_sub.maker.fgraph.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(f_sub(a_val, b_val) == 0.0)
    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))

    # verify gradient
    def just_vals(v):
        return T.Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
    utt.verify_grad(just_vals, [a_val])
Example #50
0
def filter_nvcc_flags(s):
    assert isinstance(s, str)
    flags = [flag for flag in s.split(' ') if flag]
    if any([f for f in flags if not f.startswith("-")]):
        raise ValueError(
            "Theano nvcc.flags support only parameter/value pairs without"
            " space between them. e.g.: '--machine 64' is not supported,"
            " but '--machine=64' is supported. Please add the '=' symbol."
            " nvcc.flags value is '%s'" % s)
    return ' '.join(flags)
Example #51
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and isinstance(gpu_x.owner.op, GpuFromHost) and
                # And it is a shared var or an input of the graph.
                not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([
                        n == 'output' or any([
                            isinstance(v.type, GpuArrayType)
                            for v in n.inputs + n.outputs
                        ]) for n, _ in node.outputs[0].clients
                ]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Example #52
0
def test_reshape():

    a = tcn.CudaNdarrayType((False,))()
    b = tcn.CudaNdarrayType((False, False))()
    c = T.reshape(a, [2, 3])

    #basic
    f = theano.function([a], c, mode=mode_with_gpu)
    fv = f(cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                    dtype='float32')))
    topo = f.maker.env.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(fv == numpy.asarray([[0, 1, 2], [3, 4, 5]]))

    #test that it works without inplace operations
    a_val = cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                     dtype='float32'))
    a_val_copy = cuda_ndarray.CudaNdarray(theano._asarray([0, 1, 2, 3, 4, 5],
                                                          dtype='float32'))
    b_val = cuda_ndarray.CudaNdarray(theano._asarray([[0, 1, 2], [3, 4, 5]],
                                                     dtype='float32'))

    f_sub = theano.function([a, b], c - b, mode=mode_with_gpu)
    topo = f_sub.maker.env.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(f_sub(a_val, b_val) == 0.0)
    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))

    #test that it works with inplace operations
    a_val = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float32')
    a_val_copy = theano._asarray([0, 1, 2, 3, 4, 5], dtype='float32')
    b_val = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')

    f_sub = theano.function([a, b], c - b, mode=mode_with_gpu)
    topo = f_sub.maker.env.toposort()
    assert any([isinstance(node.op, B.GpuReshape) for node in topo])
    assert numpy.all(f_sub(a_val, b_val) == 0.0)
    assert numpy.all(numpy.asarray(a_val) == numpy.asarray(a_val_copy))

    # verify gradient
    def just_vals(v):
        return T.Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
    utt.verify_grad(just_vals, [a_val])
Example #53
0
    def multinomial(self,
                    size=None,
                    n=1,
                    pvals=None,
                    ndim=None,
                    dtype='int64',
                    nstreams=None):
        """
        Sample `n` (currently `n` needs to be 1) times from a multinomial
        distribution defined by probabilities pvals.

        Example : pvals = [[.98, .01, .01], [.01, .98, .01]] will
        probably result in [[1,0,0],[0,1,0]].

        .. note::
            -`size` and `ndim` are only there keep the same signature as other
            uniform, binomial, normal, etc.
            todo : adapt multinomial to take that into account

            -Does not do any value checking on pvals, i.e. there is no
             check that the elements are non-negative, less than 1, or
             sum to 1. passing pvals = [[-2., 2.]] will result in
             sampling [[0, 0]]
        """
        if pvals is None:
            raise TypeError("You have to specify pvals")
        pvals = as_tensor_variable(pvals)
        if size is not None:
            if any([isinstance(i, int) and i <= 0 for i in size]):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
                    size)

        if n == 1 and pvals.ndim == 2:
            if size is not None:
                raise ValueError(
                    "Provided a size argument to "
                    "MRG_RandomStreams.multinomial, which does not use "
                    "the size argument.")
            if ndim is not None:
                raise ValueError(
                    "Provided an ndim argument to "
                    "MRG_RandomStreams.multinomial, which does not use "
                    "the ndim argument.")
            ndim, size, bcast = raw_random._infer_ndim_bcast(
                ndim, size, pvals[:, 0])
            assert ndim == 1
            bcast = bcast + (pvals.type.broadcastable[-1], )
            unis = self.uniform(size=size, ndim=1, nstreams=nstreams)
            op = multinomial.MultinomialFromUniform(dtype)
            return op(pvals, unis)
        else:
            raise NotImplementedError(
                ("MRG_RandomStreams.multinomial only"
                 " implemented with n == 1 and pvals.ndim = 2"))
Example #54
0
def test__toposort():
    edges = {
        1: set((4, 6, 7)),
        2: set((4, 6, 7)),
        3: set((5, 7)),
        4: set((6, 7)),
        5: set((7, ))
    }
    order = _toposort(edges)
    assert not any(a in edges.get(b, ()) for i, a in enumerate(order)
                   for b in order[i:])
Example #55
0
    def _compile_and_check(self, inputs, outputs, numeric_inputs, cls,
                           excluding=None):
        """This tests the infer_shape method only"""
        mode = self.mode
        if excluding:
            mode = mode.excluding(*excluding)

        outputs_function = theano.function(inputs, outputs, mode=mode)
        shapes_function = theano.function(inputs, [o.shape for o in outputs],
                                          mode=mode)
        #theano.printing.debugprint(shapes_function)
        # Check that the Op is removed from the compiled function.
        topo_shape = shapes_function.maker.fgraph.toposort()
        assert not any(isinstance(t.op, cls) for t in topo_shape)
        topo_out = outputs_function.maker.fgraph.toposort()
        assert any(isinstance(t.op, cls) for t in topo_out)
        # Check that the shape produced agrees with the actual shape.
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
            assert numpy.all(out.shape == shape)
Example #56
0
def test_nvidia_driver3():
    """ Test that the gpu device is initialized by theano when
        we build a function with gpu op.

        The driver should always be tested during theano initialization
        of the gpu device
    """
    var = cuda.fvector()
    f = theano.function([var], var + 1, mode=mode_with_gpu, profile=False)
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
    assert theano.sandbox.cuda.use.device_number is not None
Example #57
0
 def get_flags(*types):
     def get_dtype(t):
         if isinstance(t, (str, unicode)):
             return numpy.dtype(t)
         elif isinstance(t, Type):
             return t.dtype
         elif isinstance(t, Variable):
             return t.type.dtype
         else:
             raise TypeError, "can't get a dtype from %s" % (type(t),)
     dtypes = [get_dtype(t) for t in types]
     flags = dict(cluda=True)
     if any(d == numpy.float64 for d in dtypes):
         flags['have_double'] = True
     if any(d.itemsize < 4 for d in dtypes):
         flags['have_small'] = True
     if any(d.kind == 'c' for d in dtypes):
         flags['have_complex'] = True
     if any(d == numpy.float16 for d in dtypes):
         flags['have_half'] = True
     return flags
Example #58
0
def test_pooling_opt():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

    x = T.ftensor4()

    f = theano.function(
        [x],
        max_pool_2d(x, ds=(2, 2), ignore_border=True),
        mode=mode_with_gpu)

    assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
                for n in f.maker.fgraph.toposort()])

    f = theano.function(
        [x],
        T.grad(max_pool_2d(x, ds=(2, 2), ignore_border=True).sum(), x),
        mode=mode_with_gpu.including("cudnn"))

    assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
                for n in f.maker.fgraph.toposort()])