def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = multinomial.MultinomialFromUniform('auto')(r, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()])
    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval2 = f(pval, uval)
def test_default_conv():
    """Just test that we introduce the right GPU convolution
    version.

    """
    img = theano.tensor.ftensor4()
    fil = theano.tensor.ftensor4()

    c = theano.tensor.nnet.conv2d(img, fil)
    f = theano.function([img, fil], c, mode=theano_mode)

    if cuda.dnn.dnn_available():
        assert any([isinstance(a.op, GpuDnnConv)
                    for a in f.maker.fgraph.apply_nodes])
    else:
        assert any([isinstance(a.op, cuda.blas.GpuCorrMM)
                    for a in f.maker.fgraph.apply_nodes])

    mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([isinstance(a.op, cuda.blas.GpuConv)
                for a in f.maker.fgraph.apply_nodes])

    mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([isinstance(a.op, cuda.blas.GpuConv)
                for a in f.maker.fgraph.apply_nodes])
Beispiel #3
0
    def test_log1msigm_to_softplus(self):
        x = T.matrix()

        out = T.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = T.log(1 - T.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op, T.Flatten)
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = T.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        #assert len(topo) == 3
        assert any(isinstance(node.op, T.Reshape) for node in topo)
        assert any(isinstance(getattr(node.op, 'scalar_op', None),
                              theano.tensor.nnet.sigm.ScalarSoftplus)
                   for node in topo)
        f(numpy.random.rand(54, 11).astype(config.floatX))
Beispiel #4
0
def test_local_sampling_dot_csr():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_sampling_dot_csr")

    for sp_format in ['csr']:  # Not implemented for other format
        inputs = [
            tensor.matrix(),
            tensor.matrix(),
            getattr(theano.sparse, sp_format + '_matrix')()
        ]

        f = theano.function(inputs, sparse.sampling_dot(*inputs), mode=mode)

        if theano.config.blas.ldflags:
            assert not any(
                isinstance(node.op, sparse.SamplingDot)
                for node in f.maker.fgraph.toposort())
        else:
            # SamplingDotCSR's C implementation needs blas, so it should not
            # be inserted
            assert not any(
                isinstance(node.op, sparse.opt.SamplingDotCSR)
                for node in f.maker.fgraph.toposort())
Beispiel #5
0
def test_default_conv():
    """Just test that we introduce the right GPU convolution
    version.

    """
    img = theano.tensor.ftensor4()
    fil = theano.tensor.ftensor4()

    c = theano.tensor.nnet.conv2d(img, fil)
    f = theano.function([img, fil], c, mode=theano_mode)

    if cuda.dnn.dnn_available():
        assert any(
            [isinstance(a.op, GpuDnnConv) for a in f.maker.fgraph.apply_nodes])
    else:
        assert any([
            isinstance(a.op, cuda.blas.GpuCorrMM)
            for a in f.maker.fgraph.apply_nodes
        ])

    mode = theano_mode.excluding('local_conv_dnn', 'local_conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([
        isinstance(a.op, cuda.blas.GpuConv) for a in f.maker.fgraph.apply_nodes
    ])

    mode = theano_mode.excluding('conv_dnn', 'conv_gemm')
    f = theano.function([img, fil], c, mode=mode)

    assert any([
        isinstance(a.op, cuda.blas.GpuConv) for a in f.maker.fgraph.apply_nodes
    ])
Beispiel #6
0
def test_pooling_opt():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

    x = T.ftensor4()

    f = theano.function([x],
                        max_pool_2d(x, ds=(2, 2), ignore_border=True),
                        mode=mode_with_gpu)

    assert any([
        isinstance(n.op, cuda.dnn.GpuDnnPool)
        for n in f.maker.fgraph.toposort()
    ])

    f = theano.function([x],
                        T.grad(
                            max_pool_2d(x, ds=(2, 2),
                                        ignore_border=True).sum(), x),
                        mode=mode_with_gpu.including("cudnn"))

    assert any([
        isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
        for n in f.maker.fgraph.toposort()
    ])
Beispiel #7
0
    def _compile_and_check(self, inputs, outputs, numeric_inputs, cls,
                           excluding=None, warn=True, check_topo=True):
        """This tests the infer_shape method only

        When testing with input values with shapes that take the same
        value over different dimensions (for instance, a square
        matrix, or a tensor3 with shape (n, n, n), or (m, n, m)), it
        is not possible to detect if the output shape was computed
        correctly, or if some shapes with the same value have been
        mixed up. For instance, if the infer_shape uses the width of a
        matrix instead of its height, then testing with only square
        matrices will not detect the problem. If warn=True, we emit a
        warning when testing with such values.

        :param check_topo: If True, we check that the Op where removed
            from the graph. False is useful to test not implemented case.

        """
        mode = self.mode
        if excluding:
            mode = mode.excluding(*excluding)
        if warn:
            for var, inp in zip(inputs, numeric_inputs):
                if isinstance(inp, (int, float, list, tuple)):
                    inp = var.type.filter(inp)
                if not hasattr(inp, "shape"):
                    continue
                # remove broadcasted dims as it is sure they can't be
                # changed to prevent the same dim problem.
                if hasattr(var.type, "broadcastable"):
                    shp = [inp.shape[i] for i in range(inp.ndim)
                           if not var.type.broadcastable[i]]
                else:
                    shp = inp.shape
                if len(set(shp)) != len(shp):
                    _logger.warn(
                        "While testing the shape inference, we received an"
                        " input with a shape that has some repeated values: %s"
                        ", like a square matrix. This makes it impossible to"
                        " check if the values for these dimensions have been"
                        " correctly used, or if they have been mixed up.",
                        str(inp.shape))
                    break

        outputs_function = theano.function(inputs, outputs, mode=mode)
        shapes_function = theano.function(inputs, [o.shape for o in outputs],
                                          mode=mode)
        #theano.printing.debugprint(shapes_function)
        # Check that the Op is removed from the compiled function.
        if check_topo:
            topo_shape = shapes_function.maker.fgraph.toposort()
            assert not any(isinstance(t.op, cls) for t in topo_shape)
        topo_out = outputs_function.maker.fgraph.toposort()
        assert any(isinstance(t.op, cls) for t in topo_out)
        # Check that the shape produced agrees with the actual shape.
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
            assert numpy.all(out.shape == shape), (out.shape, shape)
Beispiel #8
0
def test_pooling_opt():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

    x = T.ftensor4()

    f = theano.function([x], max_pool_2d(x, ds=(2, 2), ignore_border=True), mode=mode_with_gpu)

    assert any([isinstance(n.op, cuda.dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])

    f = theano.function(
        [x], T.grad(max_pool_2d(x, ds=(2, 2), ignore_border=True).sum(), x), mode=mode_with_gpu.including("cudnn")
    )

    assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad) for n in f.maker.fgraph.toposort()])
Beispiel #9
0
def test_dnn_tag():
    """
    Test that if cudnn isn't avail we crash and that if it is avail, we use it.
    """
    x = T.ftensor4()
    old = theano.config.on_opt_error
    theano.config.on_opt_error = "raise"

    sio = StringIO()
    handler = logging.StreamHandler(sio)
    logging.getLogger("theano.compile.tests.test_dnn").addHandler(handler)
    # Silence original handler when intentionnally generating warning messages
    logging.getLogger("theano").removeHandler(theano.logging_default_handler)
    raised = False
    try:
        f = theano.function([x], max_pool_2d(x, ds=(2, 2), ignore_border=True), mode=mode_with_gpu.including("cudnn"))
    except (AssertionError, RuntimeError):
        assert not cuda.dnn.dnn_available()
        raised = True
    finally:
        theano.config.on_opt_error = old
        logging.getLogger("theano.compile.tests.test_dnn").removeHandler(handler)
        logging.getLogger("theano").addHandler(theano.logging_default_handler)

    if not raised:
        assert cuda.dnn.dnn_available()
        assert any([isinstance(n.op, cuda.dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])
Beispiel #10
0
    def test_neibs_manual(self):
        shape = (2, 3, 4, 4)
        for dtype in self.dtypes:
            images = shared(
                numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape))
            neib_shape = T.as_tensor_variable((2, 2))

            for border in ['valid', 'ignore_borders']:
                f = function([],
                             images2neibs(images, neib_shape, mode=border),
                             mode=self.mode)
                assert any([
                    isinstance(node.op, self.op)
                    for node in f.maker.fgraph.toposort()
                ])

                # print images.get_value(borrow=True)
                neibs = f()
                # print neibs
                assert numpy.allclose(
                    neibs,
                    [[0, 1, 4, 5], [2, 3, 6, 7], [8, 9, 12, 13],
                     [10, 11, 14, 15], [16, 17, 20, 21], [18, 19, 22, 23],
                     [24, 25, 28, 29], [26, 27, 30, 31], [32, 33, 36, 37],
                     [34, 35, 38, 39], [40, 41, 44, 45], [42, 43, 46, 47],
                     [48, 49, 52, 53], [50, 51, 54, 55], [56, 57, 60, 61],
                     [58, 59, 62, 63], [64, 65, 68, 69], [66, 67, 70, 71],
                     [72, 73, 76, 77], [74, 75, 78, 79], [80, 81, 84, 85],
                     [82, 83, 86, 87], [88, 89, 92, 93], [90, 91, 94, 95]])
                g = function([],
                             neibs2images(neibs, neib_shape, images.shape),
                             mode=self.mode)

                assert numpy.allclose(images.get_value(borrow=True), g())
Beispiel #11
0
    def test_neibs(self):
        for shape, pshape in [((10, 7, 18, 18), (2, 2)),
                              ((10, 7, 6, 18), (3, 2)),
                              ((5, 7, 66, 66), (33, 33)),
                              ((5, 7, 68, 66), (34, 33))]:
            for border in ['valid', 'ignore_borders']:
                for dtype in self.dtypes:
                    images = shared(
                        numpy.arange(numpy.prod(shape),
                                     dtype=dtype).reshape(shape))
                    neib_shape = T.as_tensor_variable(pshape)

                    f = function([],
                                 images2neibs(images, neib_shape, mode=border),
                                 mode=self.mode)

                    # print images.get_value(borrow=True)
                    neibs = f()
                    # print neibs
                    g = function([],
                                 neibs2images(neibs, neib_shape, images.shape),
                                 mode=self.mode)
                    assert any([
                        isinstance(node.op, self.op)
                        for node in f.maker.fgraph.toposort()
                    ])

                    # print g()
                    assert numpy.allclose(images.get_value(borrow=True), g())
Beispiel #12
0
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([
                        i.owner and i.owner.op == host_from_gpu
                        for i in node.inputs
                ]) or all([
                        c != 'output' and c.op == gpu_from_host
                        for c, idx in node.outputs[0].clients
                ])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [
                                safe_to_cpu(o)
                                for o in new_op(*node.inputs, return_list=True)
                            ]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Beispiel #13
0
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = multinomial.MultinomialFromUniform('auto')(p, u)
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([
                type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()
            ])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape(
            (10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = numpy.ones_like(pval[:, 0]) * 0.5
        mval = f(pval, uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
            assert mval.dtype == pval.dtype
        elif config.cast_policy == 'numpy+floatX':
            assert mval.dtype == config.floatX
        elif config.cast_policy == 'numpy':
            assert mval.dtype == 'float64'
        else:
            raise NotImplementedError(config.cast_policy)
        assert numpy.allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0]) + 0 * pval
        assert numpy.allclose(mval, asdf)  # broadcast over all rows
Beispiel #14
0
    def body(mode, gpu):
        # the m*2 allows the multinomial to reuse output
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([
                type(node.op) is multinomial.GpuMultinomialFromUniform
                for node in f.maker.fgraph.toposort()
            ])

        # test that both first and second samples can be drawn
        assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]), [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.31, .31])
        assert numpy.allclose(r, [[0, 2], [0, 2]]), r

        # test that both first labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.21, .21])
        assert numpy.allclose(r, [[0, 2], [2, 0]]), r

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[.2, .8]], [.25])
        assert numpy.allclose(r, [[0, 2]]), r
Beispiel #15
0
def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
        p, u = node.inputs
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and any([
                i.owner
                and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu)
                for i in node.inputs
        ])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
            return [
                host_from_gpu(gpu_op(*[gpu_from_host(i)
                                       for i in node.inputs])).T
            ]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost)
            and node.inputs[0].owner
            and type(node.inputs[0].owner.op) is MultinomialFromUniform):
        multi = node.inputs[0].owner
        p, u = multi.inputs
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
            # The dimshuffle is on the cpu, but will be moved to the
            # gpu by an opt.
            return [gpu_from_host(ret)]
Beispiel #16
0
def list_of_nodes(inputs, outputs):
    """ Return the apply nodes of the graph between inputs and outputs """
    return stack_search(
        deque([o.owner for o in outputs]), lambda o: [
            inp.owner for inp in o.inputs
            if inp.owner and not any(i in inp.owner.outputs for i in inputs)
        ])
Beispiel #17
0
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
                    apply_time, apply_cimpl, message, outputs_size,
                    other_time):
    # Scan overhead profile
    if any([isinstance(node.op, Scan) and v > 0 for (_, node), v in
            apply_time.items()]):
        print
        print 'Scan overhead:'
        print ('<Scan op time(s)> <sub scan fct time(s)> <sub scan op '
               'time(s)> <sub scan fct time(% scan op time)> <sub scan '
               'op time(% scan op time)> <node>')
        total_super_scan_time = 0
        total_scan_fct_time = 0
        total_scan_op_time = 0
        for (_, node), v in apply_time.items():
            if isinstance(node.op, Scan):
                if v > 0:
                    scan_fct_time = node.op.mode_instance.fn_time
                    scan_op_time = node.op.mode_instance.local_time
                    total_super_scan_time += v
                    total_scan_fct_time += scan_fct_time
                    total_scan_op_time += scan_op_time
                    print '    %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
                        v, scan_fct_time, scan_op_time,
                        scan_fct_time / v * 100, scan_op_time / v * 100), node
                else:
                    print (' The node took 0s, so we can not compute the '
                           'overhead'), node
        print '    total %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
            total_super_scan_time, total_scan_fct_time, total_scan_op_time,
            total_scan_fct_time / total_super_scan_time * 100,
            total_scan_op_time / total_super_scan_time * 100)
Beispiel #18
0
def list_of_nodes(inputs, outputs):
    """ Return the apply nodes of the graph between inputs and outputs """
    return stack_search(
            deque([o.owner for o in outputs]),
            lambda o: [inp.owner for inp in o.inputs
                           if inp.owner
                           and not any(i in inp.owner.outputs for i in inputs)])
Beispiel #19
0
    def body(mode, gpu):
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = multinomial.MultinomialFromUniform('auto')(p, u)
        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = numpy.ones_like(pval[:, 0]) * 0.5
        mval = f(pval, uval)

        assert mval.shape == pval.shape
        if config.cast_policy == 'custom':
            assert mval.dtype == pval.dtype
        elif config.cast_policy == 'numpy+floatX':
            assert mval.dtype == config.floatX
        elif config.cast_policy == 'numpy':
            assert mval.dtype == 'float64'
        else:
            raise NotImplementedError(config.cast_policy)
        assert numpy.allclose(mval.sum(axis=1), 2)
        asdf = numpy.asarray([0, 0, 2, 0])+0*pval
        assert numpy.allclose(mval, asdf)  # broadcast over all rows
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            # print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=mode_with_gpu)
            # theano.printing.debugprint(func,)
            assert any([isinstance(node.op, GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Beispiel #21
0
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
                    apply_time, apply_cimpl, message, outputs_size,
                    other_time):
    # Scan overhead profile
    if any([isinstance(node.op, Scan) and v > 0 for (_, node), v in
            apply_time.items()]):
        print
        print 'Scan overhead:'
        print ('<Scan op time(s)> <sub scan fct time(s)> <sub scan op '
               'time(s)> <sub scan fct time(% scan op time)> <sub scan '
               'op time(% scan op time)> <node>')
        total_super_scan_time = 0
        total_scan_fct_time = 0
        total_scan_op_time = 0
        for (_, node), v in apply_time.items():
            if isinstance(node.op, Scan):
                if v > 0:
                    scan_fct_time = node.op.mode_instance.fn_time
                    scan_op_time = node.op.mode_instance.local_time
                    total_super_scan_time += v
                    total_scan_fct_time += scan_fct_time
                    total_scan_op_time += scan_op_time
                    print '    %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
                        v, scan_fct_time, scan_op_time,
                        scan_fct_time / v * 100, scan_op_time / v * 100), node
                else:
                    print (' The node took 0s, so we can not compute the '
                           'overhead'), node
        print '    total %5.1fs  %5.1fs  %5.1fs  %5.1f%%  %5.1f%%' % (
            total_super_scan_time, total_scan_fct_time, total_scan_op_time,
            total_scan_fct_time / total_super_scan_time * 100,
            total_scan_op_time / total_super_scan_time * 100)
Beispiel #22
0
    def test_neibs(self):
        for shape, pshape in [((10, 7, 18, 18), (2, 2)),
                              ((10, 7, 6, 18), (3, 2)),
                              ((5, 7, 66, 66), (33, 33)),
                              ((5, 7, 68, 66), (34, 33))
                                  ]:
            for border in ['valid', 'ignore_borders']:
                for dtype in self.dtypes:
                    images = shared(
                            numpy.arange(numpy.prod(shape), dtype=dtype
                            ).reshape(shape))
                    neib_shape = T.as_tensor_variable(pshape)

                    f = function([],
                                 images2neibs(images, neib_shape, mode=border),
                                 mode=self.mode)

                    #print images.get_value(borrow=True)
                    neibs = f()
                    #print neibs
                    g = function([],
                                 neibs2images(neibs, neib_shape, images.shape),
                                 mode=self.mode)
                    assert any([isinstance(node.op, self.op)
                                for node in f.maker.fgraph.toposort()])

                    #print g()
                    assert numpy.allclose(images.get_value(borrow=True), g())
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=mode_with_gpu)
            #theano.printing.debugprint(func,)
            assert any([isinstance(node.op, GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Beispiel #24
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc(
                [b, b2],
                [tensor.dot(a, b2) + c],
                updates=[(a, tensor.dot(a, b) + c)],
                mode=mode_with_gpu)

        assert any([node.op == tcn.blas.gpu_gemm_no_inplace
            for node in f.maker.fgraph.toposort()])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(
                a.get_value(borrow=True,
                    return_internal_type=True)[::-1, ::-1],
                borrow=True)
        f(bval, bval2)
Beispiel #25
0
def _toposort(edges):
    """ Topological sort algorithm by Kahn [1] - O(nodes + vertices)

    inputs:
        edges - a dict of the form {a: {b, c}} where b and c depend on a
    outputs:
        L - an ordered list of nodes that satisfy the dependencies of edges

    >>> _toposort({1: {2, 3}, 2: (3, )})
    [1, 2, 3]

    Closely follows the wikipedia page [2]

    [1] Kahn, Arthur B. (1962), "Topological sorting of large networks",
    Communications of the ACM
    [2] http://en.wikipedia.org/wiki/Toposort#Algorithms
    """
    incoming_edges = reverse_dict(edges)
    incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items())
    S = set((v for v in edges if v not in incoming_edges))
    L = []

    while S:
        n = S.pop()
        L.append(n)
        for m in edges.get(n, ()):
            assert n in incoming_edges[m]
            incoming_edges[m].remove(n)
            if not incoming_edges[m]:
                S.add(m)
    if any(incoming_edges.get(v, None) for v in edges):
        raise ValueError("Input has cycles")
    return L
Beispiel #26
0
def test_dnn_tag():
    """
    Test that if cudnn isn't avail we crash and that if it is avail, we use it.
    """
    x = T.ftensor4()
    old = theano.config.on_opt_error
    theano.config.on_opt_error = "raise"

    sio = StringIO()
    handler = logging.StreamHandler(sio)
    logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
    # Silence original handler when intentionnally generating warning messages
    logging.getLogger('theano').removeHandler(theano.logging_default_handler)
    raised = False
    try:
        f = theano.function([x],
                            max_pool_2d(x, ds=(2, 2), ignore_border=True),
                            mode=mode_with_gpu.including("cudnn"))
    except (AssertionError, RuntimeError):
        assert not cuda.dnn.dnn_available()
        raised = True
    finally:
        theano.config.on_opt_error = old
        logging.getLogger('theano.compile.tests.test_dnn').removeHandler(
            handler)
        logging.getLogger('theano').addHandler(theano.logging_default_handler)

    if not raised:
        assert cuda.dnn.dnn_available()
        assert any([
            isinstance(n.op, cuda.dnn.GpuDnnPool)
            for n in f.maker.fgraph.toposort()
        ])
Beispiel #27
0
    def cmp(a_shp, b_shp):
        a0 = my_rand(*a_shp)
        a = tcn.shared_constructor(a0, 'a')
        cval = my_rand(a_shp[0], b_shp[1])
        c = tcn.shared_constructor(cval.copy(), 'c')

        b = tcn.fmatrix('b')
        b2 = tcn.fmatrix('b2')

        f = pfunc([b, b2], [tensor.dot(a, b2) + c],
                  updates=[(a, tensor.dot(a, b) + c)],
                  mode=mode_with_gpu)

        assert any([
            node.op == tcn.blas.gpu_gemm_no_inplace
            for node in f.maker.fgraph.toposort()
        ])
        bval = my_rand(*b_shp)
        bval2 = my_rand(*b_shp)
        rval = f(bval, bval2)

        assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value())
        assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval)

        # Try with a matrix equal to a0, but with strides in both dims
        a.set_value(a0)
        a.set_value(a.get_value(borrow=True,
                                return_internal_type=True)[::-1, ::-1],
                    borrow=True)
        f(bval, bval2)
Beispiel #28
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and
            isinstance(gpu_x.owner.op, GpuFromHost) and
            # And it is a shared var or an input of the graph.
            not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
                                              for v in n.inputs + n.outputs])
                        for n,_  in node.outputs[0].clients]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Beispiel #29
0
    def compile_args():
        """
        This args will be received by compile_str() in the preargs paramter.
        They will also be included in the "hard" part of the key module.
        """
        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
        if config.nvcc.fastmath:
            flags.append('-use_fast_math')
        cuda_ndarray_cuh_hash = hash_from_file(
            os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
        flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)

        # NumPy 1.7 Deprecate the old API. I updated most of the places
        # to use the new API, but not everywhere. When finished, enable
        # the following macro to assert that we don't bring new code
        # that use the old API.
        flags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")

        # numpy 1.7 deprecated the following macro but the didn't
        # existed in the past
        numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
        if bool(numpy_ver < [1, 7]):
            flags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
            flags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
            flags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
            flags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            flags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            flags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")

        # If the user didn't specify architecture flags add them
        if not any(['-arch=sm_' in f for f in flags]):
            # We compile cuda_ndarray.cu during import.
            # We should not add device properties at that time.
            # As the device is not selected yet!
            # TODO: re-compile cuda_ndarray when we bind to a GPU?
            import theano.sandbox.cuda
            if hasattr(theano.sandbox, 'cuda'):
                n = theano.sandbox.cuda.use.device_number
                if n is None:
                    _logger.warn(
                        "We try to get compilation arguments for CUDA"
                        " code, but the GPU device is not initialized."
                        " This is probably caused by an Op that work on"
                        " the GPU that don't inherit from GpuOp."
                        " We Initialize the GPU now.")
                    theano.sandbox.cuda.use(
                        "gpu",
                        force=True,
                        default_to_move_computation_to_gpu=False,
                        move_shared_float32_to_gpu=False,
                        enable_cuda=False)
                    n = theano.sandbox.cuda.use.device_number
                p = theano.sandbox.cuda.device_properties(n)
                flags.append('-arch=sm_' + str(p['major']) +
                             str(p['minor']))

        return flags
Beispiel #30
0
def filter_nvcc_flags(s):
    assert isinstance(s, str)
    flags = [flag for flag in s.split(' ') if flag]
    if any([f for f in flags if not f.startswith("-")]):
        raise ValueError(
            "Theano nvcc.flags support only parameter/value pairs without"
            " space between them. e.g.: '--machine 64' is not supported,"
            " but '--machine=64' is supported. Please add the '=' symbol."
            " nvcc.flags value is '%s'" % s)
    return ' '.join(flags)
Beispiel #31
0
def local_gpua_subtensor(node):
    x = node.inputs[0]
    if (x.owner and isinstance(x.owner.op, HostFromGpu)):
        gpu_x = x.owner.inputs[0]
        if (gpu_x.owner and isinstance(gpu_x.owner.op, GpuFromHost) and
                # And it is a shared var or an input of the graph.
                not gpu_x.owner.inputs[0].owner):
            if len(x.clients) == 1:
                if any([
                        n == 'output' or any([
                            isinstance(v.type, GpuArrayType)
                            for v in n.inputs + n.outputs
                        ]) for n, _ in node.outputs[0].clients
                ]):
                    return
                else:
                    return [host_from_gpu(gpu_from_host(node.outputs[0]))]

    return GpuSubtensor(node.op.idx_list)
Beispiel #32
0
def filter_nvcc_flags(s):
    assert isinstance(s, str)
    flags = [flag for flag in s.split(' ') if flag]
    if any([f for f in flags if not f.startswith("-")]):
        raise ValueError(
            "Theano nvcc.flags support only parameter/value pairs without"
            " space between them. e.g.: '--machine 64' is not supported,"
            " but '--machine=64' is supported. Please add the '=' symbol."
            " nvcc.flags value is '%s'" % s)
    return ' '.join(flags)
Beispiel #33
0
def shape_of_variables(fgraph, input_shapes):
    """
    Compute the numeric shape of all intermediate variables given input shapes

    Inputs:
        fgraph - the theano.FunctionGraph in question
        input_shapes - a dict mapping input to shape

    Outputs:
        shapes - a dict mapping variable to shape

    WARNING : This modifies the fgraph. Not pure.

    >>> import theano
    >>> x = theano.tensor.matrix('x')
    >>> y = x[512:]; y.name = 'y'
    >>> fgraph = theano.FunctionGraph([x], [y], clone=False)
    >>> shape_of_variables(fgraph, {x: (1024, 1024)})
    {y: (512, 1024), x: (1024, 1024)}
    """

    if not hasattr(fgraph, 'shape_feature'):
        fgraph.attach_feature(theano.tensor.opt.ShapeFeature())

    input_dims = [
        dimension for inp in fgraph.inputs
        for dimension in fgraph.shape_feature.shape_of[inp]
    ]

    output_dims = [
        dimension for shape in fgraph.shape_feature.shape_of.values()
        for dimension in shape
    ]

    compute_shapes = theano.function(input_dims, output_dims)

    if any([i not in fgraph.inputs for i in input_shapes.keys()]):
        raise ValueError(
            "input_shapes keys aren't in the fgraph.inputs. FunctionGraph()"
            " interface changed. Now by default, it clone the graph it receive."
            " To have the old behavior, give him this new parameter `clone=False`."
        )

    numeric_input_dims = [
        dim for inp in fgraph.inputs for dim in input_shapes[inp]
    ]
    numeric_output_dims = compute_shapes(*numeric_input_dims)

    sym_to_num_dict = dict(zip(output_dims, numeric_output_dims))

    l = {}
    for var in fgraph.shape_feature.shape_of:
        l[var] = tuple(sym_to_num_dict[sym]
                       for sym in fgraph.shape_feature.shape_of[var])
    return l
Beispiel #34
0
 def get_flags(*types):
     def get_dtype(t):
         if isinstance(t, (str, unicode)):
             return numpy.dtype(t)
         elif isinstance(t, Type):
             return t.dtype
         elif isinstance(t, Variable):
             return t.type.dtype
         else:
             raise TypeError, "can't get a dtype from %s" % (type(t),)
     dtypes = [get_dtype(t) for t in types]
     flags = dict(cluda=True)
     if any(d == numpy.float64 for d in dtypes):
         flags['have_double'] = True
     if any(d.itemsize < 4 for d in dtypes):
         flags['have_small'] = True
     if any(d.kind == 'c' for d in dtypes):
         flags['have_complex'] = True
     if any(d == numpy.float16 for d in dtypes):
         flags['have_half'] = True
     return flags
Beispiel #35
0
def test_gpu_opt():
    if not cuda.cuda_available:
        # Skip test if cuda_ndarray is not available.
        from nose.plugins.skip import SkipTest
        raise SkipTest('Optional package cuda not available')

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([
        type(node.op) is multinomial.GpuMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = multinomial.MultinomialFromUniform('auto')(r, u)
    assert m.dtype == 'float32', m.dtype
    m_gpu = cuda.gpu_from_host(m)

    f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
    assert any([
        type(node.op) is multinomial.GpuMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = numpy.ones_like(pval[:, 0]) * 0.5
    mval2 = f(pval, uval)
Beispiel #36
0
def test_nvidia_driver3():
    """ Test that the gpu device is initialized by theano when
        we build a function with gpu op.

        The driver should always be tested during theano initialization
        of the gpu device
    """
    var = cuda.fvector()
    f = theano.function([var], var + 1, mode=mode_with_gpu,
                        profile=False)
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
    assert theano.sandbox.cuda.use.device_number is not None
Beispiel #37
0
def test_local_sampling_dot_csr():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_sampling_dot_csr")

    for sp_format in ['csr']:  # Not implemented for other format
        inputs = [tensor.matrix(),
                  tensor.matrix(),
                  getattr(theano.sparse, sp_format + '_matrix')()]

        f = theano.function(inputs,
                            sparse.sampling_dot(*inputs),
                            mode=mode)

        if theano.config.blas.ldflags:
            assert not any(isinstance(node.op, sparse.SamplingDot) for node
                       in f.maker.fgraph.toposort())
        else:
            # SamplingDotCSR's C implementation needs blas, so it should not
            # be inserted
            assert not any(isinstance(node.op, sparse.opt.SamplingDotCSR) for node
                       in f.maker.fgraph.toposort())
Beispiel #38
0
def shape_of_variables(fgraph, input_shapes):
    """
    Compute the numeric shape of all intermediate variables given input shapes

    Inputs:
        fgraph - the theano.FunctionGraph in question
        input_shapes - a dict mapping input to shape

    Outputs:
        shapes - a dict mapping variable to shape

    WARNING : This modifies the fgraph. Not pure.

    >>> import theano
    >>> x = theano.tensor.matrix('x')
    >>> y = x[512:]; y.name = 'y'
    >>> fgraph = theano.FunctionGraph([x], [y], clone=False)
    >>> shape_of_variables(fgraph, {x: (1024, 1024)})
    {y: (512, 1024), x: (1024, 1024)}
    """

    if not hasattr(fgraph, 'shape_feature'):
        fgraph.attach_feature(theano.tensor.opt.ShapeFeature())

    input_dims  = [dimension for inp in fgraph.inputs
                             for dimension in fgraph.shape_feature.shape_of[inp]]

    output_dims = [dimension for shape in fgraph.shape_feature.shape_of.values()
                             for dimension in shape]

    compute_shapes = theano.function(input_dims, output_dims)

    if any([i not in fgraph.inputs for i in input_shapes.keys()]):
        raise ValueError(
            "input_shapes keys aren't in the fgraph.inputs. FunctionGraph()"
            " interface changed. Now by default, it clones the graph it receives."
            " To have the old behavior, give it this new parameter `clone=False`.")

    numeric_input_dims  = [dim for inp in fgraph.inputs
                               for dim in input_shapes[inp]]
    numeric_output_dims = compute_shapes(*numeric_input_dims)

    sym_to_num_dict = dict(zip(output_dims, numeric_output_dims))

    l = {}
    for var in fgraph.shape_feature.shape_of:
        l[var] = tuple(sym_to_num_dict[sym]
                       for sym in fgraph.shape_feature.shape_of[var])
    return l
Beispiel #39
0
def test_local_csm_properties_csm():
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_csm_properties_csm")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        f = theano.function([data, indices, indptr, shape],
                            sparse.csm_properties(
                                CS(data, indices, indptr, shape)),
                            mode=mode)
        assert not any(
            isinstance(node.op, (sparse.CSM, sparse.CSMProperties))
            for node in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40), config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape)
Beispiel #40
0
def test_local_mul_s_v():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_mul_s_v")

    for sp_format in ['csr']:  # Not implemented for other format
        inputs = [getattr(theano.sparse, sp_format + '_matrix')(),
                  tensor.vector()]

        f = theano.function(inputs,
                            sparse.mul_s_v(*inputs),
                            mode=mode)

        assert not any(isinstance(node.op, sparse.MulSV) for node
                       in f.maker.fgraph.toposort())
Beispiel #41
0
    def test_local_hard_sigmoid(self):
        x = tensor.matrix('x')
        s = sigmoid(x)

        mode = self.get_mode('local_hard_sigmoid')
        f = theano.function([x], s, mode=mode)
        topo = f.maker.fgraph.toposort()
        assert topo[0].op == sigmoid
        assert len(topo) == 1

        mode = self.get_mode().including('local_hard_sigmoid')
        f = theano.function([x], s, mode=mode)
        topo = f.maker.fgraph.toposort()
        assert len(topo) > 1
        assert not any([n.op == sigmoid for n in topo])
        ux_v = f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
Beispiel #42
0
    def test_neibs_manual(self):
        shape = (2, 3, 4, 4)
        for dtype in self.dtypes:
            images = shared(
                    numpy.arange(numpy.prod(shape), dtype=dtype
                    ).reshape(shape))
            neib_shape = T.as_tensor_variable((2, 2))

            for border in ['valid', 'ignore_borders']:
                f = function([], images2neibs(images, neib_shape, mode=border),
                             mode=self.mode)
                assert any([isinstance(node.op, self.op)
                            for node in f.maker.fgraph.toposort()])

                #print images.get_value(borrow=True)
                neibs = f()
                #print neibs
                assert numpy.allclose(neibs,
                   [[ 0,  1,  4,  5],
                   [ 2,  3,  6,  7],
                   [ 8,  9, 12, 13],
                   [10, 11, 14, 15],
                   [16, 17, 20, 21],
                   [18, 19, 22, 23],
                   [24, 25, 28, 29],
                   [26, 27, 30, 31],
                   [32, 33, 36, 37],
                   [34, 35, 38, 39],
                   [40, 41, 44, 45],
                   [42, 43, 46, 47],
                   [48, 49, 52, 53],
                   [50, 51, 54, 55],
                   [56, 57, 60, 61],
                   [58, 59, 62, 63],
                   [64, 65, 68, 69],
                   [66, 67, 70, 71],
                   [72, 73, 76, 77],
                   [74, 75, 78, 79],
                   [80, 81, 84, 85],
                   [82, 83, 86, 87],
                   [88, 89, 92, 93],
                   [90, 91, 94, 95]])
                g = function([], neibs2images(neibs, neib_shape, images.shape),
                             mode=self.mode)

                assert numpy.allclose(images.get_value(borrow=True), g())
Beispiel #43
0
def test_local_mul_s_v():
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_mul_s_v")

    for sp_format in ['csr']:  # Not implemented for other format
        inputs = [
            getattr(theano.sparse, sp_format + '_matrix')(),
            tensor.vector()
        ]

        f = theano.function(inputs, sparse.mul_s_v(*inputs), mode=mode)

        assert not any(
            isinstance(node.op, sparse.MulSV)
            for node in f.maker.fgraph.toposort())
Beispiel #44
0
def test_local_csm_properties_csm():
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()
    mode = mode.including("specialize", "local_csm_properties_csm")
    for CS, cast in [(sparse.CSC, sp.csc_matrix),
                     (sparse.CSR, sp.csr_matrix)]:
        f = theano.function([data, indices, indptr, shape],
                            sparse.csm_properties(
                                CS(data, indices, indptr, shape)),
                            mode=mode)
        assert not any(
            isinstance(node.op, (sparse.CSM, sparse.CSMProperties))
            for node in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape)
Beispiel #45
0
 def __str__(self):
     if self.name:
         return self.name
     else:
         b = self.broadcastable
         named_broadcastable = {(): 'scalar',
                  (False,): 'vector',
                  (False, True): 'col',
                  (True, False): 'row',
                  (False, False): 'matrix'}
         if b in named_broadcastable:
             bcast = named_broadcastable[b]
         else:
             if any(b):
                 bcast = str(b)
             else:
                 bcast = '%iD' % len(b)
         return "TensorType(%s, %s)" % (str(self.dtype), bcast)
Beispiel #46
0
 def __str__(self):
     if self.name:
         return self.name
     else:
         b = self.broadcastable
         named_broadcastable = {
             (): 'scalar',
             (False, ): 'vector',
             (False, True): 'col',
             (True, False): 'row',
             (False, False): 'matrix'
         }
         if b in named_broadcastable:
             bcast = named_broadcastable[b]
         else:
             if any(b):
                 bcast = str(b)
             else:
                 bcast = '%iD' % len(b)
         return "TensorType(%s, %s)" % (str(self.dtype), bcast)
Beispiel #47
0
def local_gpu_multinomial(node):
    if type(node.op) is MultinomialFromUniform:
        p, u = node.inputs
        m, = node.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32' and
            any([i.owner and isinstance(i.owner.op,
                                        theano.sandbox.cuda.HostFromGpu)
                 for i in node.inputs])):
            gpu_op = GpuMultinomialFromUniform(node.op.odtype)
            return [host_from_gpu(gpu_op(*[gpu_from_host(i)
                                           for i in node.inputs])).T]
    if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
        node.inputs[0].owner and type(node.inputs[0].owner.op)
        is MultinomialFromUniform):
        multi = node.inputs[0].owner
        p, u = multi.inputs
        m, = multi.outputs
        if (p.dtype == u.dtype == m.dtype == 'float32'):
            gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
            ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
            # The dimshuffle is on the cpu, but will be moved to the
            # gpu by an opt.
            return [gpu_from_host(ret)]
Beispiel #48
0
        def local_opt(node):
            dev = theano.sandbox.gpuarray.init_dev.device
            if cuda_only and not dev.startswith('cuda'):
                return

            if type(node.op) in OP:

                # Either one of our inputs is on the gpu or
                # all of our client are on the gpu
                if (any([i.owner and i.owner.op == host_from_gpu
                         for i in node.inputs]) or
                    all([c != 'output' and c.op == gpu_from_host
                         for c, idx in node.outputs[0].clients])):
                    new_op = maker(node)
                    # This is needed as sometimes new_op inherit from OP.
                    if new_op and new_op != node.op:
                        if isinstance(new_op, theano.Op):
                            return [safe_to_cpu(o) for o in
                                    new_op(*node.inputs, return_list=True)]
                        elif isinstance(new_op, (tuple, list)):
                            return [safe_to_cpu(o) for o in new_op]
                        else:  # suppose it is a variable on the GPU
                            return [host_from_gpu(new_op)]
            return False
Beispiel #49
0
def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
        f = theano.function(
            [data, indices, indptr, shape],
            tensor.grad(cost, data),
            mode=mode)
        assert not any(isinstance(node.op, sparse.CSMGrad) for node
                       in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape)
Beispiel #50
0
    def body(mode, gpu):
        # the m*2 allows the multinomial to reuse output
        f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
        if gpu:
            assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
                        for node in f.maker.fgraph.toposort()])

        # test that both first and second samples can be drawn
        assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]),
                              [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.31, .31])
        assert numpy.allclose(r, [[0, 2], [0, 2]]), r

        # test that both first labels can be drawn
        r = f([[.2, .8], [.3, .7]], [.21, .21])
        assert numpy.allclose(r, [[0, 2], [2, 0]]), r

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[.2, .8]], [.25])
        assert numpy.allclose(r, [[0, 2]]), r
Beispiel #51
0
def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr,
                                                      shape)))
        f = theano.function([data, indices, indptr, shape],
                            tensor.grad(cost, data),
                            mode=mode)
        assert not any(
            isinstance(node.op, sparse.CSMGrad)
            for node in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40), config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape)
Beispiel #52
0
def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
            iters=10, order='C'):
    """
    :param execute: If True, execute a Theano function that should call gemm.
    :param verbose: If True, will print some Theano flags and env variables.
    :param M,N,K: The M,N,K size used by gemm.
    :param iters: The number of calls to gemm to do.

    :return: a tuple (execution time,
                      str that represents the implementation used)
    """

    if verbose:
        print 'Some Theano flags:'
        print '    blas.ldflags=', theano.config.blas.ldflags
        print '    compiledir=', theano.config.compiledir
        print '    floatX=', theano.config.floatX
        print '    device=', theano.config.device
        print 'Some OS information:'
        print '    sys.platform=', sys.platform
        print '    sys.version=', sys.version
        print '    sys.prefix=', sys.prefix
        print 'Some environment variables:'
        print '    MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
        print '    OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
        print '    GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
        print
        print ('Numpy config: (used when the Theano flag'
               ' "blas.ldflags" is empty)')
        numpy.show_config()
        print 'Numpy dot module:', numpy.dot.__module__
        print 'Numpy location:', numpy.__file__
        print 'Numpy version:', numpy.__version__
        if (theano.config.device.startswith("gpu") or
            theano.config.init_gpu_device.startswith("gpu")):
            print 'nvcc version:'
            subprocess.call((theano.sandbox.cuda.nvcc_compiler.nvcc_path,
                             "--version"))
            print

    a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
                                 order=order))
    b = theano.shared(numpy.ones((N, K), dtype=theano.config.floatX,
                                 order=order))
    c = theano.shared(numpy.ones((M, K), dtype=theano.config.floatX,
                                 order=order))
    f = theano.function([], updates=[(c, 0.4 * c + .8 * T.dot(a, b))])

    if any([x.op.__class__.__name__ == 'Gemm' for x in
            f.maker.fgraph.toposort()]):
        c_impl = [hasattr(thunk, 'cthunk')
                  for node, thunk in zip(f.fn.nodes, f.fn.thunks)
                  if node.op.__class__.__name__ == "Gemm"]
        assert len(c_impl) == 1
        if c_impl[0]:
            impl = 'CPU (with direct Theano binding to blas)'
        else:
            impl = 'CPU (without direct Theano binding to blas but with numpy/scipy binding to blas)'
    elif any([x.op.__class__.__name__ == 'GpuGemm' for x in
              f.maker.fgraph.toposort()]):
        impl = 'GPU'
    else:
        impl = 'ERROR, unable to tell if Theano used the cpu or the gpu:\n'
        impl += str(f.maker.fgraph.toposort())

    t0 = 0
    t1 = -1

    if execute:
        sync = (hasattr(theano, "sandbox") and
                hasattr(theano.sandbox, "cuda") and
                theano.sandbox.cuda.cuda_available)
        t0 = time.time()
        for i in range(iters):
            f()
        if sync:
            theano.sandbox.cuda.synchronize()
        t1 = time.time()
    return t1 - t0, impl
Beispiel #53
0
 def depends((a, b)):
     """ Returns True if a depends on b """
     return (any(bout in a.inputs for bout in b.outputs)
             or any(depends((ainp.owner, b)) for ainp in a.inputs
                    if ainp.owner))
Beispiel #54
0
def test_downsample():
    shps = [
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
        (65536, 1, 10, 10),
        (1, 65536, 10, 10),
    ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3, 2), (1, 1):
            if ds[0] > shp[2]:
                continue
            if ds[1] > shp[3]:
                continue
            # GpuDownsampleFactorMax doesn't like having more than 512 columns
            # in the output tensor.
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
                # print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), 'a')
                f = pfunc([],
                          ds_op(tensor.as_tensor_variable(a)),
                          mode=mode_with_gpu.excluding('cudnn'))
                f2 = pfunc([],
                           ds_op(tensor.as_tensor_variable(a)),
                           mode=mode_without_gpu)
                assert any([
                    isinstance(node.op, tcn.blas.GpuDownsampleFactorMax)
                    for node in f.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMax)
                    for node in f2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(f(), f2())

                # The grad is too slow on GT220 GPU
                # This cause the computer to freeze...
                # Remove this when it gets optimized enough
                # This only bypass the last 2 checks
                # Those tests where passing in all Mode on a GTX470
                if shp[0] > 30000 or shp[1] > 30000:
                    continue

                g = pfunc([],
                          tensor.grad(
                              ds_op(tensor.as_tensor_variable(a)).sum(), a),
                          mode=mode_with_gpu.excluding('cudnn'))
                g2 = pfunc([],
                           tensor.grad(
                               ds_op(tensor.as_tensor_variable(a)).sum(), a),
                           mode=mode_without_gpu)
                assert any([
                    isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad)
                    for node in g.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGrad)
                    for node in g2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(g(), g2()), shp

                ggf = gradient.Lop(
                    tensor.grad((ds_op(tensor.as_tensor_variable(a))**2).sum(),
                                a), a, a)

                ref_mode = copy.copy(mode_without_gpu)
                ref_mode.check_py_code = False
                gpu_mode = copy.copy(mode_with_gpu)
                gpu_mode.check_py_code = False
                gg = pfunc([], ggf, mode=gpu_mode)
                gg2 = pfunc([], ggf, mode=ref_mode)

                assert any([
                    isinstance(node.op,
                               tcn.blas.GpuDownsampleFactorMaxGradGrad)
                    for node in gg.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gg2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(gg(), gg2()), shp