Exemple #1
0
def sample():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z1 = sym.elemwise_add(x, sym.sqrt(y))
    z2 = sym.log(x)
    gradient = graph_util.gradients([z1, z2], [x, y])
    print(gradient)
Exemple #2
0
def sample():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z1 = sym.elemwise_add(x, sym.sqrt(y))
    z2 = sym.log(x)
    gradient = graph_util.gradients([z1, z2], [x, y])
    print(gradient)
Exemple #3
0
def test_unary():
    x = sym.Variable('x')
    x = sym.exp(x)
    x = sym.log(x)
    x = sym.sigmoid(x)
    x = sym.tanh(x)
    x = sym.relu(x)
    assert x.list_input_names() == ['x']
Exemple #4
0
def test_unary():
    x = sym.Variable('x')
    x = sym.exp(x)
    x = sym.log(x)
    x = sym.sigmoid(x)
    x = sym.tanh(x)
    x = sym.relu(x)
    assert x.list_input_names() == ['x']
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)

    def forward(x):
        return np.log(x)

    def backward(head_grads, x):
        return [1. / x * head_grads]

    shape = {'x': (1, 3, 32, 32)}
    check_function(y, forward, backward, in_range=(0.002, 2.0), shape=shape)
Exemple #6
0
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)

    def forward(x):
        return np.log(x)

    def backward(head_grads, x):
        return [1. / x * head_grads]

    shape = {'x': (1, 3, 32, 32)}
    check_function(y, forward, backward, in_range=(0.002, 2.0), shape=shape)
Exemple #7
0
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)

    def forward(x):
        return np.log(x)

    def backward(head_grads, x):
        return [1. / x * head_grads]

    dtype = "float32"
    dshape = (1, 3, 32, 32)
    inputs = [('x', dshape, x)]
    helper(y, inputs, dtype, forward, backward, rnd_min=0.001)
Exemple #8
0
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)

    def forward(x):
        return np.log(x)

    def backward(x):
        return 1. / x

    dtype = "float32"
    dshape = (1, 3, 32, 32)
    inputs = {'x': (dshape, x)}
    helper(y, inputs, dtype, forward, backward)
Exemple #9
0
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)

    def forward(x):
        return np.log(x)

    def backward(x):
        return 1. / x

    dtype = "float32"
    dshape = (1, 3, 32, 32)
    inputs = {'x': (dshape, x)}
    helper(y, inputs, dtype, forward, backward)
Exemple #10
0
    def test_fusible_network():
        """ The network is as following:
                    data
                      |
                     exp
                    /   \
                 sqrt   log
                    \   /
                    b_add
                      |
                    tanh
        """
        batch_size = 1
        data_shape = (batch_size, 3, 224, 224)
        data = symbol.Variable('data', shape=data_shape, dtype="float32")
        shape_dict = {"data": data_shape}
        params = {}
        params["data"] = np.random.uniform(-1, 1,
                                           size=data_shape).astype("float32")

        exp = symbol.exp(data, name='exp')
        sqrt = symbol.sqrt(exp, name='sqrt')
        log = symbol.log(exp, name='log')
        ret = sqrt + log
        ret = symbol.tanh(ret)

        # Fuse log and broadcast_add.
        check_annotated_graph(ret, ['exp', 'log', 'broadcast_add'], 8,
                              shape_dict,
                              params)

        # Fuse log, broadcast_add, and tanh
        check_annotated_graph(ret, ['exp', 'sqrt', 'none', 'elemwise_add'], 6,
                              shape_dict, params)

        # No operator will be fused.
        check_annotated_graph(ret, ['log', 'sqrt', 'none', 'tanh'], 11,
                              shape_dict, params)

        # All operators will be fused.
        check_annotated_graph(ret, [''], 2, shape_dict, params)

        # All operators will be fused since all of them are annotated to the
        # same device.
        check_annotated_graph(ret,
                              ['exp', 'sqrt', 'broadcast_add', 'none', 'log',
                               'tanh'], 2, shape_dict, params)

        # Fuse exp, sqrt, log, and boradcast_add
        check_annotated_graph(ret, ['tanh'], 4, shape_dict, params)
Exemple #11
0
def test_log():
    x = sym.Variable("x")
    y = sym.log(x)
    dtype = "float32"
    dshape = (1, 3, 32, 32)
    oshape = dshape
    for target, ctx in ctx_list():
        with nnvm.compiler.build_config(opt_level=1):
            graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
        m = graph_runtime.create(graph, lib, ctx)
        data = np.random.uniform(size=dshape).astype(dtype)
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        y_np = np.log(data)
        np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
Exemple #12
0
def test_gradient():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z1 = sym.elemwise_add(x, sym.sqrt(y))
    z2 = sym.log(x)
    gradient = graph_util.gradients([z1, z2], [x, y])
    assert len(gradient) == 2

    g1 = sym.Variable("g1")
    g2 = sym.Variable("g2")
    grad_ys = [g1, g2]
    gradient = graph_util.gradients(sym.Group([z1, z2]),
                               sym.Group([x, y]), grad_ys=grad_ys)
    g_graph = graph.create(sym.Group(gradient)).ir()
    assert len(gradient) == 2
    assert "g1" in g_graph
    assert "g2" in g_graph
Exemple #13
0
def test_gradient():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z1 = sym.elemwise_add(x, sym.sqrt(y))
    z2 = sym.log(x)
    gradient = graph_util.gradients([z1, z2], [x, y])
    assert len(gradient) == 2

    g1 = sym.Variable("g1")
    g2 = sym.Variable("g2")
    grad_ys = [g1, g2]
    gradient = graph_util.gradients(sym.Group([z1, z2]),
                                    sym.Group([x, y]),
                                    grad_ys=grad_ys)
    g_graph = graph.create(sym.Group(gradient)).ir()
    assert len(gradient) == 2
    assert "g1" in g_graph
    assert "g2" in g_graph
Exemple #14
0
def test_create_full_graph():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z1 = sym.elemwise_add(x, sym.sqrt(y))
    z2 = sym.log(x)
    symbol = sym.Group([z1, z2])
    compute_graph = graph.create(symbol, need_backward=True)
    assert (compute_graph.index.num_nodes == 11)

    head_grads = [sym.Variable("g1"), sym.Variable("g2")]
    compute_graph = graph.create(symbol,
                                 need_backward=True,
                                 head_grads=head_grads)
    ir = compute_graph.ir()
    assert (compute_graph.index.num_nodes == 11)
    assert ("g1" in ir)
    assert ("g2" in ir)

    fixed_args = ["x"]
    compute_graph = graph.create(symbol,
                                 need_backward=True,
                                 fixed_args=fixed_args)
    assert (compute_graph.index.num_nodes == 8)
Exemple #15
0
def test_fusible_network(device, target):
    R""" The network is as following:
                data
                  |
                 exp
                /   \
             sqrt   log
                \   /
                b_add
                  |
                tanh
    """
    if not tvm.module.enabled(device):
        print("Skip test because %s is not enabled." % device)
        return

    batch_size = 1
    data_shape = (batch_size, 3, 224, 224)
    data = symbol.Variable('data', shape=data_shape, dtype="float32")
    shape_dict = {"data": data_shape}
    params = {}
    params["data"] = np.random.uniform(-1, 1,
                                       size=data_shape).astype("float32")

    exp = symbol.exp(data, name='exp')
    sqrt = symbol.sqrt(exp, name='sqrt')
    log = symbol.log(exp, name='log')
    ret = sqrt + log
    ret = symbol.tanh(ret)

    fallback_device = tvm.context("cpu")
    target = {"cpu": "llvm", device: target}

    # Fuse log and broadcast_add.
    op_name_device = {
        "exp": "cpu",
        "log": "cpu",
        "broadcast_add": "cpu",
        "sqrt": device,
        "elemwise_add": device,
        "tanh": device
    }
    check_annotated_graph(ret, target, op_name_device, 8, fallback_device,
                          shape_dict, params)

    # Fuse log, broadcast_add, and tanh
    op_name_device = {
        "exp": "cpu",
        "log": device,
        "broadcast_add": device,
        "sqrt": "cpu",
        "elemwise_add": "cpu",
        "tanh": device
    }
    check_annotated_graph(ret, target, op_name_device, 6, fallback_device,
                          shape_dict, params)

    # No operator will be fused.
    op_name_device = {
        "exp": device,
        "log": "cpu",
        "broadcast_add": device,
        "sqrt": "cpu",
        "elemwise_add": device,
        "tanh": "cpu"
    }
    check_annotated_graph(ret, target, op_name_device, 11, fallback_device,
                          shape_dict, params)

    # All operators will be fused.
    op_name_device = {
        "exp": device,
        "log": device,
        "broadcast_add": device,
        "sqrt": device,
        "elemwise_add": device,
        "tanh": device
    }
    check_annotated_graph(ret, target, op_name_device, 2, fallback_device,
                          shape_dict, params)

    # All operators will be fused since all of them are annotated to the
    # same device.
    op_name_device = {
        "exp": "cpu",
        "log": "cpu",
        "broadcast_add": "cpu",
        "sqrt": "cpu",
        "elemwise_add": "cpu",
        "tanh": "cpu"
    }
    check_annotated_graph(ret, target, op_name_device, 2, fallback_device,
                          shape_dict, params)

    # Fuse exp, sqrt, log, and boradcast_add
    op_name_device = {
        "exp": device,
        "log": device,
        "broadcast_add": device,
        "sqrt": device,
        "elemwise_add": device,
        "tanh": "cpu"
    }
    check_annotated_graph(ret, target, op_name_device, 4, fallback_device,
                          shape_dict, params)
def test_check_function():
    # test the testing function

    x = sym.Variable("x")
    y = sym.Variable("y")

    # different styles of returning gradients from the backward function
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: [head_grads, 2 * head_grads],
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: (head_grads, 2 * head_grads),
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: {
                       'x': head_grads,
                       'y': 2 * head_grads
                   },
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: {'y': 2 * head_grads},
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: [2 * head_grads],
                   grad_input_vars=[y],
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: 2 * head_grads,
                   grad_input_vars=[y],
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32')
    check_function(x + 2 * y,
                   lambda x, y: x + 2 * y,
                   lambda x, y, head_grads: 2 * head_grads,
                   grad_input_vars=[y],
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float64')

    # test just numerical gradients
    # different styles of shape and dtype passing
    check_function(x + 2 * y,
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   numerical_grads=True)
    check_function(x + 2 * y,
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype='float32',
                   numerical_grads=True)
    check_function(x + 2 * y,
                   shape={
                       'x': (1, 2),
                       y: (1, 2)
                   },
                   dtype={
                       x: 'float32',
                       'y': 'float32'
                   },
                   numerical_grads=True)
    check_function(x + 2 * y,
                   shape=(1, 2),
                   dtype='float32',
                   numerical_grads=True)

    # specifying variable attributes on variable creation
    # (in this case type codes must be used)
    x = sym.Variable("x", dtype=0, shape=(1, 2))
    check_function(x + 2 * y,
                   shape={y: (1, 2)},
                   dtype={'y': 'float32'},
                   numerical_grads=True)
    y = sym.Variable("y", dtype=0, shape=(1, 2))

    # shape overriding
    def _fwd1(x, y):
        assert x.shape == (1, 1)
        assert y.shape == (1, 2)
        return x + 2 * y

    check_function(x + 2 * y, _fwd1, shape={x: (1, 1)})

    # in_range
    def _fwd2(x, y):
        assert x.shape == (100, )
        assert (x <= 0.9).all()
        assert (x >= 0.8).all()
        return x + 2 * y

    check_function(x + 2 * y,
                   _fwd2,
                   shape=(100, ),
                   in_range=(0.8, 0.9),
                   numerical_grads=False)
    check_function(x + 2 * y,
                   _fwd2,
                   shape=(100, ),
                   in_range={'x': (0.8, 0.9)},
                   numerical_grads=False)
    check_function(x + 2 * y,
                   backward=lambda x, y, head_grads: [1.0, 2.0],
                   in_range={'head_grads_0': (1.0, 1.0)})
    # explicit passing of values
    check_function(x + 2 * y,
                   backward=lambda x, y, head_grads: [1.0, 2.0],
                   values={'head_grads_0': np.full((1, 2), 1.0)})

    # check that the function reports errors
    def _check_function_must_fail(*args, **kwargs):
        error = AssertionError
        if 'error' in kwargs:
            error = kwargs['error']
            del kwargs['error']
        try:
            check_function(*args, quiet=True, **kwargs)
        except error:
            pass
        else:
            raise AssertionError("check_function didn't raise an exception")

    _check_function_must_fail(x + 2 * y, error=ValueError)
    _check_function_must_fail(x + 2 * y, lambda x, y: x + y)
    _check_function_must_fail(x + 2 * y,
                              backward=lambda x, y, head_grads: [1.0, 2.0])
    _check_function_must_fail(sym.block_grad(x + 2 * y), numerical_grads=True)
    _check_function_must_fail(x * x,
                              numerical_grads=True,
                              numerical_grads_params={
                                  'atol': 0.0,
                                  'rtol': 0.0
                              })
    _check_function_must_fail(sym.log(-x * x),
                              numerical_grads=True,
                              error=ValueError)

    # different styles of returning results from the forward function
    check_function(x + 2 * y, lambda x, y: [x + 2 * y], numerical_grads=False)
    _check_function_must_fail(x + 2 * y,
                              lambda x, y: [x + 2 * y, x],
                              numerical_grads=False,
                              error=ValueError)
    _check_function_must_fail(x + 2 * y,
                              lambda x, y: [],
                              numerical_grads=False,
                              error=ValueError)

    # multiple outputs
    z = sym.Group([2 * x + y, x + 2 * y])
    check_function(z, lambda x, y: [2 * x + y, x + 2 * y])
    check_function(z, lambda x, y: (2 * x + y, x + 2 * y))
    check_function(
        z,
        backward=lambda x, y, head_grads:
        [2 * head_grads[0] + head_grads[1], head_grads[0] + 2 * head_grads[1]])
    _check_function_must_fail(z,
                              backward=lambda x, y, head_grads:
                              [2 * head_grads[0], 2 * head_grads[1]])
    check_function(
        z,
        backward=lambda x, y, head_grads: [head_grads[1], 2 * head_grads[1]],
        in_range={'head_grads_0': (0, 0)})
    check_function(z, numerical_grads=True)

    z = sym.Group([sym.block_grad(2 * x + y), x + 2 * y])
    check_function(z,
                   lambda x, y: [2 * x + y, x + 2 * y],
                   numerical_grads=False)
    _check_function_must_fail(z, lambda x, y: [2 * x + y, x + 2 * y])
    _check_function_must_fail(z, numerical_grads=True)

    z = sym.Group([2 * x + y, sym.block_grad(x + 2 * y)])
    _check_function_must_fail(z, numerical_grads=True)

    z = sym.Group([2 * x + y, x + 2 * y, x, y, sym.sum(x)])
    check_function(z, lambda x, y: [2 * x + y, x + 2 * y, x, y, np.sum(x)])

    # passing additional parameters to forward and backward
    def _fwd3(x, p):
        assert p == 'v'
        return x + 1

    def _bwd3(x, p, head_grads):
        assert p == 'v'
        return head_grads

    check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'})

    # implicitly created variables and shape/dtype inference for inputs
    x = sym.Variable("x", shape=(2, 3), dtype=0)
    b = sym.Variable("b")
    y = sym.dense(data=x, bias=b, units=4)
    # Don't check gradients on cuda because is doesn't yet support ewise after reduce
    check_function(y, exclude_targets={'cuda'}, numerical_grads=True)
    check_function(y,
                   shape={'x': (3, 4)},
                   exclude_targets={'cuda'},
                   numerical_grads=True)
    check_function(y,
                   dtype={'x': 'float64'},
                   exclude_targets={'cuda'},
                   numerical_grads=True)

    x = sym.Variable("x")
    b = sym.Variable("b")
    w = sym.Variable("w")
    y = sym.dense(data=x, bias=b, weight=w, units=4)

    def _fwd_dense(x, w, b):
        return np.dot(x, w.T) + b

    check_function(y,
                   _fwd_dense,
                   shape={'x': (1, 2)},
                   dtype={'x': 'float32'},
                   numerical_grads=False)
    check_function(y,
                   _fwd_dense,
                   shape={'x': (1, 2)},
                   dtype={'w': 'float64'},
                   numerical_grads=False)
    _check_function_must_fail(y,
                              _fwd_dense,
                              shape={'x': (1, 2)},
                              dtype={
                                  'w': 'float64',
                                  'b': 'float32'
                              },
                              numerical_grads=False,
                              error=nnvm._base.NNVMError)
    # fails because no shape
    _check_function_must_fail(y,
                              _fwd_dense,
                              numerical_grads=False,
                              error=ValueError)
    # ok because type is float32 by default
    check_function(y, _fwd_dense, shape={'x': (1, 2)}, numerical_grads=False)
Exemple #17
0
def test_check_function():
    # test the testing function

    x = sym.Variable("x")
    y = sym.Variable("y")

    # different styles of returning gradients from the backward function
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: [head_grads, 2*head_grads],
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: (head_grads, 2*head_grads),
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads},
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: {'y': 2*head_grads},
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: [2*head_grads],
                   grad_input_vars=[y],
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: 2*head_grads,
                   grad_input_vars=[y],
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
    check_function(x + 2*y, lambda x, y: x + 2*y,
                   lambda x, y, head_grads: 2*head_grads,
                   grad_input_vars=[y],
                   shape={'x': (1, 2), y: (1, 2)}, dtype='float64')

    # test just numerical gradients
    # different styles of shape and dtype passing
    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)},
                   numerical_grads=True)
    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32',
                   numerical_grads=True)
    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'},
                   numerical_grads=True)
    check_function(x + 2*y, shape=(1, 2), dtype='float32',
                   numerical_grads=True)

    # specifying variable attributes on variable creation
    # (in this case type codes must be used)
    x = sym.Variable("x", dtype=0, shape=(1, 2))
    check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True)
    y = sym.Variable("y", dtype=0, shape=(1, 2))

    # shape overriding
    def _fwd1(x, y):
        assert x.shape == (1, 1)
        assert y.shape == (1, 2)
        return x + 2*y
    check_function(x + 2*y, _fwd1, shape={x: (1, 1)})

    # in_range
    def _fwd2(x, y):
        assert x.shape == (100,)
        assert (x <= 0.9).all()
        assert (x >= 0.8).all()
        return x + 2*y
    check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False)
    check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False)
    check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
                   in_range={'head_grads_0': (1.0, 1.0)})
    # explicit passing of values
    check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
                   values={'head_grads_0': np.full((1, 2), 1.0)})

    # check that the function reports errors
    def _check_function_must_fail(*args, **kwargs):
        error = AssertionError
        if 'error' in kwargs:
            error = kwargs['error']
            del kwargs['error']
        try:
            check_function(*args, quiet=True, **kwargs)
        except error:
            pass
        else:
            raise AssertionError("check_function didn't raise an exception")

    _check_function_must_fail(x + 2*y, error=ValueError)
    _check_function_must_fail(x + 2*y, lambda x, y: x + y)
    _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0])
    _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True)
    _check_function_must_fail(x*x, numerical_grads=True,
                              numerical_grads_params={'atol': 0.0, 'rtol': 0.0})
    _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError)

    # different styles of returning results from the forward function
    check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False)
    _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False,
                              error=ValueError)
    _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False,
                              error=ValueError)

    # multiple outputs
    z = sym.Group([2*x + y, x + 2*y])
    check_function(z, lambda x, y: [2*x + y, x + 2*y])
    check_function(z, lambda x, y: (2*x + y, x + 2*y))
    check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1],
                                                         head_grads[0] + 2*head_grads[1]])
    _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0],
                                                                    2*head_grads[1]])
    check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]],
                   in_range={'head_grads_0': (0, 0)})
    check_function(z, numerical_grads=True)

    z = sym.Group([sym.block_grad(2*x + y), x + 2*y])
    check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False)
    _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y])
    _check_function_must_fail(z, numerical_grads=True)

    z = sym.Group([2*x + y, sym.block_grad(x + 2*y)])
    _check_function_must_fail(z, numerical_grads=True)

    z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)])
    check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)])

    # passing additional parameters to forward and backward
    def _fwd3(x, p):
        assert p == 'v'
        return x + 1
    def _bwd3(x, p, head_grads):
        assert p == 'v'
        return head_grads
    check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'})

    # implicitly created variables and shape/dtype inference for inputs
    x = sym.Variable("x", shape=(2, 3), dtype=0)
    b = sym.Variable("b")
    y = sym.dense(data=x, bias=b, units=4)
    # Don't check gradients on cuda because is doesn't yet support ewise after reduce
    check_function(y, exclude_targets={'cuda'}, numerical_grads=True)
    check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True)
    check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True)

    x = sym.Variable("x")
    b = sym.Variable("b")
    w = sym.Variable("w")
    y = sym.dense(data=x, bias=b, weight=w, units=4)
    def _fwd_dense(x, w, b):
        return np.dot(x, w.T) + b
    check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False)
    check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False)
    _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)},
                              dtype={'w': 'float64', 'b': 'float32'},
                              numerical_grads=False,
                              error=nnvm._base.NNVMError)
    # fails because no shape
    _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError)
    # ok because type is float32 by default
    check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False)