Ejemplo n.º 1
0
def inplace_function_test_helper(inputs,
                                 func,
                                 func_args=[],
                                 func_kwargs={},
                                 ctx=None,
                                 rng=None):
    if rng is None:
        rng = np.random.RandomState(313)
    if ctx is None:
        ctx = nn.Context()
    with nn.context_scope(ctx):
        a_s = [inp * 1.0 for inp in inputs]
        y = func(*(a_s + list(func_args)), inplace=False, **func_kwargs)
        l = F.sum(y)
        a_s_i = [inp * 1.0 for inp in inputs]
        y_i = func(*(a_s_i + list(func_args)), inplace=True, **func_kwargs)
        l_i = F.sum(y_i)
    data = [(rng.randn(*inp.shape), rng.randn(*inp.shape)) for inp in inputs]
    for i in range(len(data)):
        inputs[i].d = data[i][0]
        inputs[i].g = data[i][1]
    l.forward()
    l.backward()
    grads = [inp.g.copy() for inp in inputs]
    for i in range(len(data)):
        inputs[i].d = data[i][0]
        inputs[i].g = data[i][1]
    l_i.forward()
    l_i.backward()
    grads_i = [inp.g.copy() for inp in inputs]
    for g, g_i in zip(grads, grads_i):
        assert np.allclose(g, g_i), str(ArrayDiffStats(g, g_i))
Ejemplo n.º 2
0
def list_context(func_name):
    try:
        import list_context_ext
        return list_context_ext.list(func_name)
    except Exception as e:
        print(e)
        return [(nn.Context(), func_name)]
Ejemplo n.º 3
0
def test_graph_model(model, seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definition
    nn.clear_parameters()
    if model == "mlp":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
        z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    elif model == "recurrent":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 8)
            z2 = F.relu(z, inplace=True)
        h = z2
        for _ in range(2):
            with nn.parameter_scope('fc2'):
                h = PF.affine(h, 8)
                h = F.relu(h, inplace=True)
        with nn.parameter_scope('fc3'):
            z3 = PF.affine(h, 5)
    elif model == "convolution":
        with nn.parameter_scope('conv1'):
            z = PF.convolution(x, 3, (2, 2))
            z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    else:
        raise ValueError()
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    L.forward(clear_no_need_grad=True)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)
    parameters = nn.get_parameters()
    for param in parameters.values():
        param.grad.zero()
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert_allclose(ngrad, agrad, atol=1.05e-2)
Ejemplo n.º 4
0
def context(type_config='float', **kw):
    """CPU Context."""
    backends = ['cpu:float']
    if type_config == 'half':
        backends = ['cpu:half', 'cpu:float']
    elif type_config == 'float':
        pass
    else:
        raise ValueError("Unknown data type config is given %s" % type_config)
    return nn.Context(backends, array_classes()[0], '')
Ejemplo n.º 5
0
def test_cuda_large_blocks(m):
    CUDA_THREAD_PER_BLOCK = 512
    CUDA_MAX_BLOCKS = 65536
    size = CUDA_MAX_BLOCKS * CUDA_THREAD_PER_BLOCK * m + 3
    print "Variable size:", size
    x = np.zeros((size, ), np.float32)
    v = nn.Variable(x.shape)
    v.d = x
    ctx = nn.Context(backend='cuda')
    y = F.relu(v)
Ejemplo n.º 6
0
def test_function_context(seed):
    rng = np.random.RandomState(313)
    xd = rng.randn(2, 3)
    x = nn.Variable.from_numpy_array(xd)
    ctx1 = nn.Context(backend=['cpu:float'],
                      array_class='CpuCachedArray', device_id='1')

    with nn.context_scope(ctx1):
        y = F.relu(x)
    ctx0 = nn.Context(backend=['cpu:float'],
                      array_class='CpuCachedArray', device_id='0')

    # TODO: use id or hash if we determine the spec
    assert str(ctx0) != str(ctx1)
    assert str(ctx1) == str(y.parent.context)

    with nn.context_scope(y.parent.context):
        z = F.relu(x)
    assert str(y.parent.context) == str(z.parent.context)
Ejemplo n.º 7
0
def test_graph_logreg(seed):
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4], need_grad=True)
    w1 = nn.Variable([12, 5], need_grad=True)
    w2 = nn.Variable([12, 5], need_grad=True)
    b1 = nn.Variable([5], need_grad=True)
    b2 = nn.Variable([5], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    w1.d = rng.randn(*w1.shape)
    w2.d = rng.randn(*w2.shape)
    b1.d = rng.randn(*b1.shape)
    b2.d = rng.randn(*b2.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    z1 = F.affine(x, w1, b1, 1)
    z2 = F.affine(x, w2, b2, 1)
    l1 = F.softmax_cross_entropy(z1, t, 1)
    L1 = F.mean(l1)
    l2 = F.softmax_cross_entropy(z2, t, 1)
    L2 = F.mean(l2)
    nn.forward_all([L1, L2])

    # Backprop for z1
    # Diff should be initialized since they are always accumulated
    x.g = 0
    w1.g = 0
    b1.g = 0
    L1.backward(clear_buffer=True)

    inputs = [x, w1, b1]

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L1, inputs, 1e-3, False)
    assert_allclose(ngrad, agrad, atol=1e-2)

    # Backprop for z2
    # Diff should be initialized since they are always accumulated
    x.g = 0
    w2.g = 0
    b2.g = 0
    L2.backward(clear_buffer=True)

    inputs = [x, w2, b2]

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L2, inputs, 1e-3, False)
    assert_allclose(ngrad, agrad, atol=1e-2)
Ejemplo n.º 8
0
def solver_tester(rng,
                  solver,
                  ref_solver,
                  solver_args=[],
                  solver_kwargs={},
                  num_itr=5,
                  decay=1e-4,
                  atol=1e-6,
                  ctx=None,
                  solver_name=None):
    if ctx is None:
        ctx = nn.Context()

    # Create params
    p1 = nn.Variable([2, 3, 4])
    p2 = nn.Variable([3, 4, 1, 2])
    p3 = nn.Variable([])

    params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)])
    for p in params.values():
        p.d = rng.randn(*p.shape)
        p.g = rng.randn(*p.shape)

    with nn.context_scope(ctx):
        s = solver(*solver_args, **solver_kwargs)
    s.set_parameters(params)
    if solver_name is not None:
        assert s.name == solver_name

    ref_s = ref_solver(*solver_args, **solver_kwargs)
    ref_s.set_parameters(params)

    # Check weight decay.
    grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)])
    s.weight_decay(decay)
    ref_s.weight_decay(grad_copy, decay)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert np.allclose(ref_p, p.g, atol=atol)

    # Check solver udpate.
    for i in range(num_itr):
        grads = OrderedDict([(k, rng.randn(*p.shape))
                             for k, p in iteritems(params)])
        for k, g in iteritems(grads):
            params[k].g = g
        s.update()
        ref_s.update(grads)
        for p, ref_p in zip(params.values(), ref_s.params.values()):
            assert np.allclose(ref_p, p.d, atol=atol)

    # Check if remove_state_impl work correctly.
    s.clear_parameters()
Ejemplo n.º 9
0
def visualize(args):
    """
    Visualizing embedded digits onto 2D space.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    batch_size = 500

    # Create default context.
    ctx = nn.Context(backend="cpu|cuda",
                     compute_backend="default|cudnn",
                     array_class="CudaArray",
                     device_id="{}".format(args.device_id))

    # Load parameters
    nn.load_parameters(
        os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))

    # Create embedder network
    image = nn.Variable([batch_size, 1, 28, 28])
    feature = mnist_lenet_feature(image, test=False)

    # Process all images
    features = []
    labels = []
    # Prepare MNIST data iterator

    rng = np.random.RandomState(313)
    data = data_iterator_mnist(batch_size, train=False, shuffle=True, rng=rng)
    for i in range(10000 // batch_size):
        image_data, label_data = data.next()
        image.d = image_data / 255.
        feature.forward(clear_buffer=True)
        features.append(feature.d.copy())
        labels.append(label_data.copy())
    features = np.vstack(features)
    labels = np.vstack(labels)

    # Visualize
    f = plt.figure(figsize=(16, 9))
    for i in range(10):
        c = plt.cm.Set1(i / 10.)
        plt.plot(features[labels.flat == i, 0].flatten(),
                 features[labels.flat == i, 1].flatten(),
                 '.',
                 c=c)
    plt.legend(map(str, range(10)))
    plt.grid()
    plt.savefig(os.path.join(args.monitor_path, "embed.png"))
Ejemplo n.º 10
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
        z4 = PF.affine(z2, 5)
    l1 = F.softmax_cross_entropy(z3, t, 1)
    L1 = F.mean(l1)
    l2 = F.softmax_cross_entropy(z4, t, 1)
    L2 = F.mean(l2)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            nn.forward_all([L1, L2], clear_no_need_grad=cnng)

            # for now, the first backward cannot be
            # called with clear_buffer=True
            L1.backward(clear_buffer=False)
            L2.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                import platform
                if platform.machine() == 'ppc64le':
                    pytest.skip("This test fails on ppc64le")
                assert np.all(g == g2)
Ejemplo n.º 11
0
def _context(proto):
    comm = current_communicator()
    if not proto.backends:
        logger.warn('Old-style context. Updating to new format.')
        # Update from old Context
        backends = [x.strip() for x in proto.backend.split('|')]
        compute_backends = [
            x.strip() for x in proto.compute_backend.split('|')
        ]
        if 'cuda' in backends:
            device_id = str(proto.device_id)
            if comm:
                device_id = str(comm.local_rank)

            if 'cudnn' in compute_backends:
                try:
                    import nnabla_ext.cudnn
                    ctx = nnabla_ext.cudnn.context(device_id=device_id)
                except ImportError:
                    logger.warn('Fallback to CPU context.')
                    import nnabla_ext.cpu
                    ctx = nnabla_ext.cpu.context()
            elif 'default' in compute_backends:
                try:
                    import nnabla_ext.cuda
                    ctx = nnabla_ext.cuda.context(device_id=device_id)
                except ImportError:
                    logger.warn('Fallback to CPU context.')
                    import nnabla_ext.cpu
                    ctx = nnabla_ext.cpu.context()
            else:
                raise ValueError('Invalid compute_backend {}'.format(
                    proto.compute_backend))
        elif 'cpu' in backends:
            import nnabla_ext.cpu
            ctx = nnabla_ext.cpu.context()
        else:
            raise ValueError('Invalid context {}'.format(proto))
        ctx.array_class = str(proto.array_class)
        return ctx
    ctx = nn.Context()
    ctx.backend = proto.backends
    ctx.array_class = str(proto.array_class)

    if comm:
        ctx.device_id = str(comm.local_rank)
    else:
        ctx.device_id = str(proto.device_id)

    return ctx
Ejemplo n.º 12
0
def test_rehape():
    v = nn.Variable([2, 3, 4], need_grad=True)
    grad = np.random.randn(*v.shape).astype(np.float32)
    v.g = grad
    v.d = np.random.randn(*v.shape)
    import nnabla.functions as F
    with nn.context_scope(nn.Context()), nn.auto_forward():
        v2 = F.identity(v)
        v2_s = v2.reshape((3, 4, 2))
        v3 = F.identity(v2_s)
    v3.backward(clear_buffer=False)
    assert np.all(v2_s.g.flat == v2.g.flat)
    assert np.all(v2_s.g == 1)
    v2.d = 1
    assert np.all(v2_s.d == 1)
    v2.g = 1.5
    assert np.all(v2_s.g == 1.5)
Ejemplo n.º 13
0
def test_unlinked():
    v = nn.Variable([2, 3, 4], need_grad=True)
    grad = np.random.randn(*v.shape).astype(np.float32)
    v.g = grad
    v.d = np.random.randn(*v.shape)
    import nnabla.functions as F
    with nn.context_scope(nn.Context()), nn.auto_forward():
        v2 = F.identity(v)
        v2_u = v2.unlinked()
        v3 = F.identity(v2_u)
    v2_u.grad.zero()
    v2_g = v2_u.g.copy()
    v3.backward(clear_buffer=False)
    assert type(v2_u) == type(v2)
    assert np.all(v.g == grad)
    assert np.all(v2_u.g == v2.g)
    assert np.all(v2_u.g == v2_g + 1)
Ejemplo n.º 14
0
def list(func_name):

    sys.path.append(
        os.path.join(os.path.dirname(__file__), '..', '..', 'build-tools',
                     'code_generator'))
    from load_implements_rst import Implements

    l = [(nn.Context(), func_name)]

    info = Implements().info
    if func_name in info:
        if 'cuda' in info[func_name]:
            import nnabla_ext.cuda
            l.append((nnabla_ext.cuda.context(), func_name + 'Cuda'))
        if 'cudnn' in info[func_name]:
            import nnabla_ext.cuda.cudnn
            l.append(
                (nnabla_ext.cuda.cudnn.context(), func_name + 'CudaCudnn'))
    return l
Ejemplo n.º 15
0
def ref_grad_spectral_norm(w, u, dy, du, dim, itr, eps, test, output_u, need_grad_flags):
    # We need this function for using `function_tester`
    # because the numerical gradient of `w` will not be calculated correctly.
    # The reason is there are some intermediate variables with `need_grad == false`
    # which are connected to the input `w` in the function composite implementation.

    cpu_context = nn.Context(["cpu:float"])
    with nn.context_scope(cpu_context):
        w = nn.Variable.from_numpy_array(w)
        u = nn.Variable.from_numpy_array(u)
        w.need_grad = True
        w.grad.zero()
        w_sn = PF._spectral_norm_v1(
            w, u_init=u.data.get_data('r'), dim=dim, itr=itr, test=test)

        w_sn.forward(clear_no_need_grad=True)
        w_sn.backward(dy, clear_buffer=True)

    return w.grad.get_data('r').flatten()
Ejemplo n.º 16
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            L.forward(clear_no_need_grad=cnng)
            L.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                assert np.all(g == g2)
Ejemplo n.º 17
0
def clear_no_need_grad_tester(rng,
                              func,
                              inputs,
                              func_args=[],
                              func_kwargs={},
                              backward=None,
                              atol_f=1e-6,
                              ctx=None,
                              func_name=None,
                              insert_identity=[],
                              auto_forward=False):
    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True for _ in inputs]
    if not True in backward:
        return

    state_rng = None
    if rng is not None:
        state_rng = rng.get_state()
    else:
        rng = rng = np.random.RandomState(313)

    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinputs += [nn.Variable(i.shape, need_grad=b)]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    vinputs = create_variables(inputs, backward)
    vinputs_clear_buffer = create_variables(inputs, backward)
    vinputs_identity_clear_buffer = []
    if not insert_identity:
        insert_identity = [True] * len(vinputs)

    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        for idx, i in enumerate(vinputs_clear_buffer):
            if i is None:
                vinputs_identity_clear_buffer += [None]
            elif insert_identity[idx]:
                vinputs_identity_clear_buffer += [F.identity(i)]
            else:
                vinputs_identity_clear_buffer += [i]

    # Checking forward(clear_no_need_grad=True)
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        o = func(*(vinputs + func_args), **func_kwargs)
        o = force_tuple(o)
        F.sink(*o).forward(clear_no_need_grad=False)

        o_clear_buffer = func(*(vinputs_identity_clear_buffer + func_args),
                              **func_kwargs)
        o_clear_buffer = force_tuple(o_clear_buffer)
        o_identity_clear_buffer = list(
            map(lambda x: F.identity(x)
                if x is not None else None, o_clear_buffer))
        o_identity_clear_buffer = list(
            filter(lambda x: x is not None, o_identity_clear_buffer))

        F.sink(*o_identity_clear_buffer).forward(clear_no_need_grad=True)

    for i in range(len(o)):
        if o[i] is None:
            continue
        ref = o[i].d
        res = o_identity_clear_buffer[i].d
        assert_allclose(
            ref,
            res,
            atol=atol_f,
            err_msg="{} forward(clear_no_need_grad=True) test fails".format(
                func_name))

    vinputs = list(filter(lambda x: x is not None, vinputs))
    vinputs_clear_buffer = list(
        filter(lambda x: x is not None, vinputs_clear_buffer))

    for i in range(len(vinputs)):
        vinputs[i].grad.zero()
        vinputs_clear_buffer[i].grad.zero()

    for i in range(len(o)):
        if o[i] is None:
            continue
        o[i].g = randn(rng, *o[i].shape)
        o_identity_clear_buffer[i].g = o[i].g

    F.sink(*o).backward()
    F.sink(*o_identity_clear_buffer).backward(clear_buffer=True)

    for i in range(len(vinputs)):
        ref = vinputs[i].g
        res = vinputs_clear_buffer[i].g
        assert_allclose(
            ref,
            res,
            atol=atol_f,
            err_msg="{} forward(clear_no_need_grad=True) and backward test fails"
            .format(func_name))

    if state_rng:
        rng.set_state(state_rng)
Ejemplo n.º 18
0
def backward_function_tester(rng,
                             func,
                             ref_func,
                             inputs,
                             func_args=[],
                             func_kwargs={},
                             atol_f=1e-6,
                             atol_b=1e-3,
                             atol_accum=1e-3,
                             dstep=1e-3,
                             backward=None,
                             ctx=None,
                             func_name=None,
                             ref_grad=None,
                             disable_half_test=False,
                             atol_half=1e-1):
    """Backward function tester

    In the forward test, it compares the results of nn.grad and `func`.backward.
    In the backward test, it compares the analytical gradients and numerical gradient with `grad_outputs`.
    """
    # TODO: half

    from scipy.optimize import approx_fprime

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True if i is not None else False for i in inputs]

    # TODO: Remove set_default_context after adding ctx to BackwardFunction.
    nn.set_default_context(ctx)

    # Create Variables
    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinputs += [nn.Variable(i.shape, need_grad=b)]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    # Create grad_outputs
    def create_grad_outputs(outputs):
        grad_outputs = []
        for o in outputs:
            if o.shape == ():
                go = nn.NdArray.from_numpy_array(np.array(randn(rng)))
                #go = nn.NdArray.from_numpy_array(np.array(1.0))
            else:
                go = nn.NdArray.from_numpy_array(randn(rng, *o.shape))
                #go = nn.NdArray.from_numpy_array(np.ones(o.shape))

            grad_outputs.append(go)
        return grad_outputs

    # Fill grads
    def fill_grads(vinputs, grads):
        for vi, gd in zip(vinputs, grads):
            if vi is None:
                continue
            vi.g = gd

    # Fill grads
    def zero_grads(vinputs):
        for vi in vinputs:
            if vi is None:
                continue
            vi.grad.zero()
        return

    # Gradient penalty on grads
    def gradient_penalty2(grads):
        gp2 = 0.0
        for g in grads:
            gp2 += F.sum(g**2.0)
        return gp2

    # Product sum

    def prod_sum(inputs0, inputs1):
        out = 0.0
        for inp0, inp1 in zip(inputs0, inputs1):
            out += inp0 * nn.Variable(inp1.shape).apply(data=inp1)
        return out

    # Set inputs for the numerical gradients

    def set_inputs(inputs0, vinputs):
        begin = 0
        for i in vinputs:
            end = begin + i.size
            if i.need_grad == True:
                i.d = inputs0[begin:end].reshape(i.shape)
            begin = end

    # Gradient penalty on grads used for computing numerical gradients
    def obj_func(inputs0, gp2, vinputs):
        set_inputs(inputs0, vinputs)
        gp2.forward()
        return gp2.d.copy()

    # # Half test
    # if not disable_half_test:
    #     finputs = create_variables(inputs, backward)
    #     hinputs = create_variables(inputs, backward)
    #     half_test(rng, func, finputs, hinputs, func_args,
    #               func_kwargs, backward, ctx, func_name, atol=atol_half)

    # Create input variables
    vinputs = create_variables(inputs, backward)
    # --- Forward test --- #
    # Zero grads
    zero_grads(vinputs)
    # Forward/Backward on the forward graph
    voutputs = [
        F.sigmoid(x)
        for x in force_list(func(*(vinputs + func_args), **func_kwargs))
    ]
    agrad_outputs = create_grad_outputs(voutputs)
    o = prod_sum(voutputs, agrad_outputs)
    o.forward()
    o.backward()  # clear_buffer=True)
    # Grads
    voutputs = voutputs
    vinputs = list(filter(lambda vi: vi is not None, vinputs))
    agrad_outputs = agrad_outputs
    grads = nn.grad(voutputs, vinputs, agrad_outputs)
    grads = list(filter(lambda x: x is not None, grads))
    o = F.sink(*grads)
    o.forward()
    # Check forward
    for vi, go in zip(vinputs, grads):
        if vi.need_grad is False:
            continue
        fgrads = vi.g
        bgrads = go.d
        assert_allclose(fgrads, bgrads, atol=atol_f)

    # TODO: 1. Pass function argument directly to backward functions.
    # TODO: 2. should be changed for the simplier form by simply testing BackwardFunction

    # --- Backward (accum = False) test --- #
    # Zero grads
    zero_grads(vinputs)
    # Compute analytical grads
    gp2 = gradient_penalty2(grads)
    gp2.forward()
    gp2.backward(clear_buffer=True)
    analytical_grads = np.concatenate(
        [vi.g.copy().flatten() for vi in vinputs])
    analytical_grads0 = analytical_grads
    # Compute numerical grads
    inputs0 = np.concatenate(
        [inp.flatten() for inp in inputs if inp is not None])
    numerical_grads = approx_fprime(inputs0, obj_func, dstep, gp2, vinputs)
    # Check backward
    assert_allclose(analytical_grads, numerical_grads, atol=atol_b)

    # --- Backward (accum = True) test --- #
    # Random grads
    rand_grads = [randn(rng, *vi.shape) for vi in vinputs]
    fill_grads(vinputs, rand_grads)
    # Compute analytical grads
    gp2.forward()
    gp2.backward(clear_buffer=True)

    analytical_grads = np.concatenate(
        [vi.g.copy().flatten() for vi in vinputs])
    rand_grads = np.concatenate([
        rg.flatten() if isinstance(rg, np.ndarray) else np.array(rg).reshape(
            (1, )) for rg in rand_grads
    ])
    analytical_grads -= rand_grads
    # Check backward
    assert_allclose(analytical_grads, analytical_grads0, atol=atol_accum)
Ejemplo n.º 19
0
def function_tester(rng,
                    func,
                    ref_func,
                    inputs,
                    func_args=[],
                    func_kwargs={},
                    atol_f=1e-6,
                    atol_b=1e-3,
                    atol_accum=1e-6,
                    dstep=1e-3,
                    backward=None,
                    ctx=None,
                    func_name=None,
                    ref_grad=None,
                    disable_half_test=False,
                    atol_half=1e-1):
    """ Automatic testing of forward/backward pass of `func` by comparing it
    to the reference implementation in `ref_func`.

    Syntax of `ref_func`: inputs, parameters
    Syntax of `ref_grad`: inputs, output grads, parameters
    """

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True for _ in inputs]

    # Create Variables
    # print('create_variable')

    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinputs += [nn.Variable(i.shape, need_grad=b)]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    # Half test
    if not disable_half_test:
        finputs = create_variables(inputs, backward)
        hinputs = create_variables(inputs, backward)
        half_test(rng,
                  func,
                  finputs,
                  hinputs,
                  func_args,
                  func_kwargs,
                  backward,
                  ctx,
                  func_name,
                  atol=atol_half)

    vinputs = create_variables(inputs, backward)
    # Checking forward
    # print('checking forward')
    with nn.context_scope(ctx), nn.auto_forward():
        o = func(*(vinputs + func_args), **func_kwargs)
    rinputs = copy.deepcopy(inputs)  # inputs for ref_func
    refs = ref_func(*(rinputs + func_args), **func_kwargs)

    refs = force_tuple(refs)
    o = force_tuple(o)
    assert len(o) == len(refs)
    for i, ref in enumerate(refs):
        res = o[i].d
        assert np.allclose(ref, res,
                           atol=atol_f), str(ArrayDiffStats(ref, res))

    # Checking function name
    try:
        import function_test_callback
        result = create_function_nnp(vinputs, o, func_name, func_args,
                                     func_kwargs)
        if result is not None:
            function_test_callback.callback(func_name, *result)
    except UnboundLocalError:
        pass
    except IndexError:
        pass
    except ImportError:
        pass

    # print('checking function name')
    if func_name is not None:
        assert o[0].parent.name == func_name

    # Checking backward
    # print('checking backward')
    if not True in backward:
        return

    # NNabla backward
    for v in vinputs:
        if v is None:
            continue
        if len(v.shape) == 0:
            v.g = rng.randn()
            continue
        v.g = rng.randn(*v.shape).astype(v.data.dtype)
    # Verify grad
    vinputs = create_variables(inputs, backward)
    rinputs = copy.deepcopy(inputs)
    rinputs = [
        rinput if test else None for rinput, test in zip(rinputs, backward)
    ]
    vgrads = [rng.randn(*o_.shape) for o_ in o]

    agrads, ngrads = compute_analytical_and_numerical_grad(o[0].parent,
                                                           vinputs,
                                                           o,
                                                           rinputs,
                                                           vgrads,
                                                           epsilon=dstep,
                                                           rng=rng,
                                                           ref_grad=ref_grad)
    if ref_grad is not None:
        rinputs = copy.deepcopy(inputs)
        doutputs = [o_.g for o_ in o]
        ngrads = ref_grad(*(rinputs + doutputs + func_args), **func_kwargs)

    assert np.allclose(ngrads, agrads,
                       atol=atol_b), str(ArrayDiffStats(ngrads, agrads))

    # Check if need_grad works
    for v, b in zip(vinputs, backward):
        if not b or v is None:
            continue
        v.g = 0
        v.need_grad = False
        try:
            o[0].parent.backward(
                list(filter(lambda x: x is not None, vinputs)), o)
        except RuntimeError as e:
            continue  # TODO
        assert np.all(v.g == 0)

    # test accum=False
    for i in range(len(vinputs)):
        if vinputs[i] is None:
            continue
        v = vinputs[i]
        v.need_grad = backward[i]

    for i in range(len(vinputs)):
        if vinputs[i] is None:
            continue
        v = vinputs[i]

        if not backward[i]:
            continue
        f = o[0].parent

        # If input's grad is inplaced, the test doesn't work correctly.
        if f.inplace_grad(i):
            continue

        # Prepare function inputs
        finputs = list(filter(lambda x: x is not None, vinputs))

        # Save accum gradient result
        g = rng.randn(*v.shape)
        v.g = g
        f.forward(finputs, o)
        f.backward(finputs, o)
        true_g = v.g - g

        # Check accum=False
        accum = [j != i for j, vv in enumerate(vinputs) if vv is not None]
        v.g = rng.randn(*v.shape)
        f.forward(finputs, o)
        f.backward(finputs, o, accum)
        assert np.allclose(v.g, true_g,
                           atol=atol_accum), str(ArrayDiffStats(v.g, true_g))

        # Check accum=False with NaN gradient
        v.g = np.float32('nan')
        f.forward(finputs, o)
        f.backward(finputs, o, accum)
        assert not np.any(np.isnan(v.g))
Ejemplo n.º 20
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import numpy as np
import nnabla as nn
import nnabla.functions as F
from nbla_test_utils import list_context

ctxs = list_context('PReLU')

if hasattr(nn.extensions, 'cuda'):
    ctxs += [(nn.Context(backend='cuda'), 'PReLUCuda')]


def ref_prelu(x, w, base_axis=1):
    wshape = [1 for _ in range(x.ndim)]
    if w.size != 1:
        wshape[base_axis] = w.size
    return np.maximum(0, x) + w.reshape(wshape) * np.minimum(0, x)


@pytest.mark.parametrize("seed", [313])
@pytest.mark.parametrize("inshape, wshape, base_axis",
                         [((2, 3, 2, 3, 2), tuple(), 4),
                          ((2, 3, 1, 3), (3, ), 1)])
@pytest.mark.parametrize("ctx, func_name", ctxs)
def test_prelu_forward_backward(seed, inshape, wshape, base_axis, ctx,
Ejemplo n.º 21
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import numpy as np
import nnabla as nn
import nnabla.functions as F

ctxs = [(nn.Context(), 'ConfusionMatrix')]
if hasattr(nn.extensions, 'cuda'):
    ctxs += [(nn.extensions.cuda.context(), 'ConfusionMatrixCuda')]


def ref_confusion_matrix(x, l, axis):
    orig_x = x.copy()
    x = np.rollaxis(x, axis, x.ndim).reshape(-1, x.shape[axis])
    ll = np.rollaxis(l, axis, x.ndim).flatten()
    y = np.zeros((orig_x.shape[axis], orig_x.shape[axis]), np.int)
    for x_, ll_ in zip(x, ll):
        index = -1
        for i, x__ in enumerate(x_):
            if x__ >= x_[index]:
                index = i
        y[ll_][index] += 1
Ejemplo n.º 22
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import numpy as np
import nnabla as nn
import nnabla.functions as F
from nbla_test_utils import list_context
from nnabla.testing import assert_allclose

ctxs = list_context('FusedConvolution')
cpu_context = nn.Context(["cpu:float"])


class RefFusedConvolutionGraph(object):
    def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis,
                 pad, stride, dilation, group, channel_last, decay_rate, eps,
                 batch_stat, nonlinearity, nonlinearity_args):

        from collections import OrderedDict
        inputs = OrderedDict()
        xvar = nn.Variable.from_numpy_array(x)
        weightvar = nn.Variable.from_numpy_array(weight)
        inputs['x'] = xvar
        inputs['weight'] = weightvar
        biasvar = None
        betavar = None
Ejemplo n.º 23
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import numpy as np
import nnabla as nn
import nnabla.functions as F

ctxs = [(nn.Context(), 'BinaryError')]
if hasattr(nn.extensions, 'cuda'):
    ctxs += [(nn.extensions.cuda.context(), 'BinaryErrorCuda')]


def ref_binary_error(x, l):
    y = []
    for x_, l_ in zip(x, l):
        y.append((x_ >= 0.5) != (l_ >= 0.5))
    return np.array(y).reshape(x.shape)


@pytest.mark.parametrize("ctx, func_name", ctxs)
@pytest.mark.parametrize("seed", [313])
def test_binary_error_forward(seed, ctx, func_name):
    ishape = [5, 6, 7]
Ejemplo n.º 24
0
def pack_padded_sequence(padded_sequence,
                         lengths,
                         batch_first=False,
                         enforce_sorted=True):
    r"""Pack a padded variable-length sequences.

    This method packs a padded variable-length sequences.

    :math:`T` is the max length over the lengths of sequences.
    :math:`B` is the batch size equal to the length of the sequences.     
    :math:`*` is the remaining dimensions including none.

    .. note::
      This function **must** be used the dynamic computation mode.


    Example:

    .. code-block:: python

      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
      import nnabla.utils.rnn as rnn_utils

      nn.set_auto_forward(True)

      l2v = lambda ldata: nn.Variable.from_numpy_array(np.asarray(ldata))
      a = l2v([1, 1, 1, 1])
      b = l2v([2, 2, 2])
      c = l2v([2, 2, 2])
      d = l2v([3, 3])
      e = l2v([3, 3])
      sequences = [a, b, c, d, e]
      lengths = l2v([seq.shape[0] for seq in sequences])

      padded_sequence = rnn_utils.pad_sequence(sequences)
      print(padded_sequence.d)

      packed_sequence = rnn_utils.pack_padded_sequence(padded_sequence, lengths)
      print(packed_sequence.data.d)
      print(packed_sequence.batch_sizes.d)

    Args: 
      padded_sequence (:obj:`nnabla.Variable`): Padded sequence of (:math:`T \times B \times *`)
                                                or (:math:`B \times T \times *`) shape.
      lengths (:obj:`nnabla.Variable`): Sequence length for each batch and always resides in CPU.
      batch_first (bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False,
                          otherwise (:math:`B`, :math:`T`, :math:`*`).
      enforce_sorted (bool): Sequences are sorted by the length in a decreasing order if True. Default is True.

    Returns: 
        :obj:`PackedSequence`
    """
    if enforce_sorted:
        sorted_indices = None
        unsorted_indices = None
    else:
        # TODO: replace cuda context when the bug fix of the sort
        with nn.context_scope(nn.Context()):
            lengths, sorted_indices = F.sort(lengths,
                                             axis=0,
                                             reverse=True,
                                             with_index=True)

        B = sorted_indices.shape[0]
        unsorted_indices = F.scatter_nd(F.arange(0, B),
                                        sorted_indices.reshape((1, B)),
                                        shape=(B, ))
        axis = 0 if batch_first else 1
        padded_sequence = F.gather(padded_sequence, sorted_indices, axis)

    packed_sequence, batch_sizes = F.pack_padded_sequence(
        padded_sequence, lengths, batch_first)
    packed_sequence0 = PackedSequence()
    packed_sequence0.data = packed_sequence
    packed_sequence0.batch_sizes = batch_sizes
    packed_sequence0.sorted_indices = sorted_indices
    packed_sequence0.unsorted_indices = unsorted_indices

    return packed_sequence0
Ejemplo n.º 25
0
def solver_tester(rng,
                  solver,
                  ref_solver,
                  solver_args=[],
                  solver_kwargs={},
                  num_itr=5,
                  decay=1e-4,
                  clip_norm=0.5,
                  atol=1e-6,
                  ctx=None,
                  solver_name=None):
    if ctx is None:
        ctx = nn.Context()

    # Create params
    p1 = nn.Variable([2, 3, 4])
    p2 = nn.Variable([3, 4, 1, 2])
    p3 = nn.Variable([])

    params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)])
    for p in params.values():
        p.d = rng.randn(*p.shape)
        p.g = rng.randn(*p.shape)

    with nn.context_scope(ctx):
        s = solver(*solver_args, **solver_kwargs)
    s.set_parameters(params)
    if solver_name is not None:
        assert s.name == solver_name

    ref_s = ref_solver(*solver_args, **solver_kwargs)
    ref_s.set_parameters(params)

    # Get params (unordered_map is used in C++, thus check in both directions)
    params_ = s.get_parameters()
    for k0, v0 in iteritems(ref_s.params):
        v1 = params_[k0]
        assert_allclose(v0, v1.d, atol=atol)
    for k1, v1 in iteritems(params_):
        v0 = ref_s.params[k1]
        assert_allclose(v0, v1.d, atol=atol)

    # Check weight decay.
    grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)])
    s.weight_decay(decay)
    ref_s.weight_decay(grad_copy, decay)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert_allclose(ref_p, p.g, atol=atol)

    # Check clip grad by norm.
    grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)])
    s.clip_grad_by_norm(clip_norm)
    ref_s.clip_grad_by_norm(grad_copy, clip_norm)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert np.allclose(ref_p, p.g, atol=atol)

    # Check solver udpate.
    for i in range(num_itr):
        grads = OrderedDict([(k, rng.randn(*p.shape))
                             for k, p in iteritems(params)])
        for k, g in iteritems(grads):
            params[k].g = g
        s.update()
        ref_s.update(grads)
        # update check
        for p, ref_p in zip(params.values(), ref_s.params.values()):
            assert_allclose(ref_p, p.d, atol=atol)
        # iteration state incrementaion check
        for state in s.get_states().values():
            assert state.t == (i + 1)

    # Check inf, nan, and inf/nan
    for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]], [
            lambda s: s.check_inf_grad(), lambda s: s.check_nan_grad(),
            lambda s: s.check_inf_or_nan_grad()
    ]):

        def set_value(p):
            p.g[...] = rng.choice(v + [-1, 0, 1],
                                  size=int(np.prod(p.shape)),
                                  replace=True).reshape(p.shape)
            if v[0] not in p.g:
                p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0]

        for p in params.values():
            assert method(s) == False
            g = p.g.copy()
            set_value(p)
            assert method(s) == True
            p.g[...] = g

    # Rescale grad
    scale = 10.
    ref_grad = [p.g.copy() for p in params.values()]
    for p in params.values():
        p.g *= scale
    s.scale_grad(1. / scale)
    for ref, p in zip(ref_grad, params.values()):
        assert_allclose(ref, p.g, atol=1e-4)

    # Save/Load Test
    def test_save_load(s, name):
        # Save states
        import tempfile
        tmpdir = tempfile.mkdtemp("solver-test")
        tmpfile = os.path.join(tmpdir, name)
        states0 = s.get_states()
        s.save_states(tmpfile)
        # Load states
        with nn.context_scope(ctx):
            s1 = solver(*solver_args, **solver_kwargs)
            s1.set_parameters(params)
            s1.load_states(tmpfile)
        # Check save/load states
        states1 = s1.get_states()
        for k0, s0 in iteritems(states0):
            s1 = states1[k0]
            for sname, vx0 in iteritems(s0.pstate):
                vx1 = s1.pstate[sname]
                assert_allclose(vx0.d, vx1.d)
            assert s1.t == s0.t

    test_save_load(s, "states.h5")
    test_save_load(s, "states.protobuf")

    # Check if remove_state_impl work correctly.
    s.clear_parameters()
Ejemplo n.º 26
0
def solver_tester(rng,
                  solver,
                  ref_solver,
                  solver_args=[],
                  solver_kwargs={},
                  num_itr=5,
                  decay=1e-4,
                  atol=1e-6,
                  ctx=None,
                  solver_name=None):
    if ctx is None:
        ctx = nn.Context()

    # Create params
    p1 = nn.Variable([2, 3, 4])
    p2 = nn.Variable([3, 4, 1, 2])
    p3 = nn.Variable([])

    params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)])
    for p in params.values():
        p.d = rng.randn(*p.shape)
        p.g = rng.randn(*p.shape)

    with nn.context_scope(ctx):
        s = solver(*solver_args, **solver_kwargs)
    s.set_parameters(params)
    if solver_name is not None:
        assert s.name == solver_name

    ref_s = ref_solver(*solver_args, **solver_kwargs)
    ref_s.set_parameters(params)

    # Check weight decay.
    grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)])
    s.weight_decay(decay)
    ref_s.weight_decay(grad_copy, decay)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert np.allclose(ref_p, p.g, atol=atol)

    # Check solver udpate.
    for i in range(num_itr):
        grads = OrderedDict([(k, rng.randn(*p.shape))
                             for k, p in iteritems(params)])
        for k, g in iteritems(grads):
            params[k].g = g
        s.update()
        ref_s.update(grads)
        for p, ref_p in zip(params.values(), ref_s.params.values()):
            assert np.allclose(ref_p, p.d, atol=atol)

    # Check inf, nan, and inf/nan
    for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]], [
            lambda s: s.check_inf_grad(), lambda s: s.check_nan_grad(),
            lambda s: s.check_inf_or_nan_grad()
    ]):

        def set_value(p):
            p.g[...] = rng.choice(v + [-1, 0, 1],
                                  size=int(np.prod(p.shape)),
                                  replace=True).reshape(p.shape)
            if v[0] not in p.g:
                p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0]

        for p in params.values():
            assert method(s) == False
            g = p.g.copy()
            set_value(p)
            assert method(s) == True
            p.g[...] = g

    # Rescale grad
    scale = 10.
    ref_grad = [p.g.copy() for p in params.values()]
    for p in params.values():
        p.g *= scale
    s.scale_grad(1. / scale)
    for ref, p in zip(ref_grad, params.values()):
        assert np.allclose(ref, p.g, atol=1e-4)

    # Check if remove_state_impl work correctly.
    s.clear_parameters()
Ejemplo n.º 27
0
def function_tester(rng,
                    func,
                    ref_func,
                    inputs,
                    func_args=[],
                    func_kwargs={},
                    atol_f=1e-6,
                    atol_b=1e-3,
                    atol_accum=1e-6,
                    dstep=1e-3,
                    backward=None,
                    ctx=None,
                    func_name=None,
                    ref_grad=None,
                    disable_half_test=False,
                    atol_half=1e-1,
                    insert_identity=[],
                    disable_clear_no_need_grad_test=False,
                    auto_forward=False):
    """ Automatic testing of forward/backward pass of `func` by comparing it
    to the reference implementation in `ref_func`.

    Syntax of `ref_func`: inputs, parameters
    Syntax of `ref_grad`: inputs, output grads, parameters
    """

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True for _ in inputs]

    # Create Variables
    # print('create_variable')

    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinputs += [nn.Variable(i.shape, need_grad=b)]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    # Half test
    if not disable_half_test:
        finputs = create_variables(inputs, backward)
        hinputs = create_variables(inputs, backward)
        half_test(rng,
                  func,
                  finputs,
                  hinputs,
                  func_args,
                  func_kwargs,
                  backward,
                  ctx,
                  func_name,
                  atol=atol_half)

    vinputs = create_variables(inputs, backward)
    # Checking forward
    # print('checking forward')
    with nn.context_scope(ctx), nn.auto_forward():
        o = func(*(vinputs + func_args), **func_kwargs)
    rinputs = copy.deepcopy(inputs)  # inputs for ref_func
    refs = ref_func(*(rinputs + func_args), **func_kwargs)

    refs = force_tuple(refs)
    o = force_tuple(o)
    assert len(o) == len(refs)
    for i, ref in enumerate(refs):
        res = o[i].d
        assert_allclose(ref,
                        res,
                        atol=atol_f,
                        err_msg="{} forward test fails".format(func_name))

    # Checking recomputation
    vinputs = create_variables(inputs, backward)
    recomputation_test(rng, func, vinputs, func_args, func_kwargs, ctx)

    # Checking forward(clear_no_need_grad=True)
    if not disable_clear_no_need_grad_test:
        clear_no_need_grad_tester(rng, func, inputs, func_args, func_kwargs,
                                  backward, atol_f, ctx, func_name,
                                  insert_identity, auto_forward)

    # Checking function name
    try:
        import function_test_callback
        result = create_function_nnp(vinputs, o, func_name, func_args,
                                     func_kwargs)
        if result is not None:
            function_test_callback.callback(func_name, *result)
    except UnboundLocalError:
        pass
    except IndexError:
        pass
    except ImportError:
        pass

    # print('checking function name')
    if func_name is not None:
        assert o[0].parent.name == func_name

    # Checking backward
    # print('checking backward')
    if not True in backward:
        return

    # NNabla backward
    for v in vinputs:
        if v is None:
            continue
        if len(v.shape) == 0:
            v.g = randn(rng)
            continue
        v.g = randn(rng, *v.shape)
    # Verify grad
    vinputs = create_variables(inputs, backward)
    rinputs = copy.deepcopy(inputs)
    rinputs = [
        rinput if test else None for rinput, test in zip(rinputs, backward)
    ]
    vgrads = [randn(rng, *o_.shape) for o_ in o]

    def reset_ograds():
        '''
        Reset output grads everytime we call backward.
        This is required because the output grad might
        be inplaced and modified during backward operation.
        '''
        for ovar, g in zip(o, vgrads):
            ovar.g = g

    agrads, ngrads = compute_analytical_and_numerical_grad(o[0].parent,
                                                           vinputs,
                                                           o,
                                                           rinputs,
                                                           vgrads,
                                                           epsilon=dstep,
                                                           rng=rng,
                                                           ref_grad=ref_grad)
    if ref_grad is not None:
        rinputs = copy.deepcopy(inputs)
        doutputs = copy.deepcopy(vgrads)
        ngrads = ref_grad(*(rinputs + doutputs + func_args),
                          **func_kwargs,
                          need_grad_flags=backward)

    assert_allclose(
        ngrads,
        agrads,
        atol=atol_b,
        err_msg="{} backward w/o accumulation test fails".format(func_name))

    # Check if need_grad works
    for v, b in zip(vinputs, backward):
        if not b or v is None:
            continue
        v.grad.zero()
        v.need_grad = False
        reset_ograds()
        try:
            o[0].parent.forward(list(filter(lambda x: x is not None, vinputs)),
                                o)
            o[0].parent.backward(
                list(filter(lambda x: x is not None, vinputs)), o)
        except RuntimeError as e:
            continue  # TODO
        assert np.all(v.g == 0)

    # test accum=False
    for i in range(len(vinputs)):
        if vinputs[i] is None:
            continue
        v = vinputs[i]
        v.need_grad = backward[i]

    for i in range(len(vinputs)):
        if vinputs[i] is None:
            continue
        v = vinputs[i]

        if not backward[i]:
            continue
        f = o[0].parent

        # Prepare function inputs
        finputs = list(filter(lambda x: x is not None, vinputs))

        # Save accum gradient result
        g = randn(rng, *v.shape)
        v.g = g
        reset_ograds()
        f.forward(finputs, o)
        f.backward(finputs, o)
        true_g = v.g - g

        # Check accum=False
        accum = [j != i for j, vv in enumerate(vinputs) if vv is not None]
        v.g = randn(rng, *v.shape)
        reset_ograds()
        f.forward(finputs, o)
        f.backward(finputs, o, accum)
        assert_allclose(
            v.g,
            true_g,
            atol=atol_accum,
            err_msg="{} backward w/ accumulation test fails.".format(
                func_name))

        # Check accum=False with NaN gradient
        v.g = np.float32('nan')
        reset_ograds()
        f.forward(finputs, o)
        f.backward(finputs, o, accum)
        assert not np.any(np.isnan(v.g))
Ejemplo n.º 28
0
def context(**kw):
    """CPU Context."""
    return nn.Context('cpu', array_classes()[0], '', 'default')
Ejemplo n.º 29
0
def backward_function_tester(rng,
                             func,
                             inputs=None,
                             func_args=[],
                             func_kwargs={},
                             atol_f=1e-4,
                             atol_b=1e-3,
                             atol_accum=5e-2,
                             dstep=1e-3,
                             backward=None,
                             backward_b=None,
                             ctx=None,
                             non_accum_check=False,
                             skip_backward_check=False,
                             insert_identity=[],
                             auto_forward=False):
    """ Automatic testing of backward function and backward pass of `func` by comparing it.
    The backward pass of `func` is the reference; therefore, 
    the backward pass of `func` must be tested first!

    Syntax of `ref_func`: inputs, parameters
    """

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True for _ in inputs]

    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinp = nn.Variable(i.shape, need_grad=b)
            vinp.grad.zero()  # grads always not accumulation
            vinputs += [vinp]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    vinputs = create_variables(inputs, backward)
    vinputs_for_clear_buffer = create_variables(inputs, backward)
    vinputs_for_nn_grad = create_variables(inputs, backward)

    vinputs_identity = []
    vinputs_identity_for_clear_buffer = []
    vinputs_identity_for_nn_grad = []
    if not insert_identity:
        insert_identity = [True] * len(vinputs)

    for idx, i in enumerate(
            zip(vinputs, vinputs_for_clear_buffer, vinputs_for_nn_grad)):
        with nn.auto_forward(auto_forward):
            i0, i1, i2 = i
            if i0 is None:
                vinputs_identity += [None]
                vinputs_identity_for_clear_buffer += [None]
                vinputs_identity_for_nn_grad += [None]
            elif insert_identity[idx]:
                vinputs_identity += [F.identity(i0)]
                vinputs_identity_for_clear_buffer += [F.identity(i1)]
                vinputs_identity_for_nn_grad += [F.identity(i2)]
            else:
                vinputs_identity += [i0]
                vinputs_identity_for_clear_buffer += [i1]
                vinputs_identity_for_nn_grad += [i2]

    # Forward and backward of the forward function with no buffer clear
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs0 = func(*(vinputs_identity + func_args), **func_kwargs)
        outputs0 = force_list(outputs0)
        F.sink(*outputs0).forward(clear_no_need_grad=False)
    grad_voutputs = []
    for output in outputs0:
        ograd = rng.randn(*output.shape)
        grad_voutputs.append(
            nn.Variable.from_numpy_array(ograd).apply(need_grad=True))
        output.g = ograd
    F.sink(*outputs0, one_input_grad=False).backward()
    vinputs = list(filter(lambda x: x is not None, vinputs))
    vinputs_identity = list(filter(lambda x: x is not None, vinputs_identity))
    vinputs_for_clear_buffer = list(
        filter(lambda x: x is not None, vinputs_for_clear_buffer))
    grad_inputs0 = [inp.g.copy() for inp in vinputs]

    # Forward and backward of the forward function with clear redundant buffer
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs_for_clear_buffer = func(
            *(vinputs_identity_for_clear_buffer + func_args), **func_kwargs)
        outputs_for_clear_buffer = force_list(outputs_for_clear_buffer)
        outputs_for_clear_buffer = list(
            map(lambda x: F.identity(x)
                if x is not None else None, outputs_for_clear_buffer))
        F.sink(*outputs_for_clear_buffer).forward(clear_no_need_grad=True)

    for o, ref_o in zip(outputs_for_clear_buffer, outputs0):
        o.g = ref_o.g

    # Check backward
    F.sink(*outputs_for_clear_buffer,
           one_input_grad=False).backward(clear_buffer=True)

    grad_inputs_for_clear_buffer = [
        inp.g.copy() for inp in vinputs_for_clear_buffer
    ]
    for grad_ref, grad_res in zip(grad_inputs0, grad_inputs_for_clear_buffer):
        if grad_ref is None or grad_res is None:
            continue
        assert_allclose(
            grad_ref,
            grad_res,
            atol=atol_f,
            err_msg=
            "backward(clear_buffer=True) and backward(clear_buffer=False) results differ."
        )

    # Forward of the backward function
    from nnabla.backward_functions import registry
    func_name = output.parent.info.type_name
    func_backward = registry[func_name]
    grad_vinputs = grad_voutputs + vinputs
    grad_vinputs_identity = grad_voutputs + vinputs_identity
    func_info_args = output.parent.info.args
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        ograds0 = func_backward(grad_vinputs_identity, **func_info_args)
        ograds0 = force_list(ograds0)
        ograds0_ = list(filter(lambda o: o is not None, ograds0))
        F.sink(*ograds0_).forward(clear_no_need_grad=True)
    outputs1 = []
    for i, ograd in enumerate(ograds0):
        outputs1.append(ograd.d.copy()) if ograd is not None else \
          outputs1.append(None)

    # Check num of returned elements
    assert_allclose(
        len(vinputs),
        len(outputs1),
        err_msg="Length of the outputs ({}) does not match "
        "the length of the inputs ({}) to the backward function".format(
            len(outputs1), len(vinputs)))

    # Check forward
    for i, elm in enumerate(zip(grad_inputs0, outputs1)):
        grad_ref, grad_res = elm
        if grad_ref is None or grad_res is None:
            continue
        assert_allclose(
            grad_ref,
            grad_res,
            atol=atol_f,
            err_msg=
            "Forward of the backward function ({}) fails at {}-th output.".
            format(func_backward.__name__, i))

    # Check the same results between backward_function and nn.grad
    vinputs = [v for b, v in zip(backward, vinputs) if b]
    vinputs = list(filter(lambda x: x is not None, vinputs))

    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs0_for_nn_grad = func(
            *(vinputs_identity_for_nn_grad + func_args), **func_kwargs)
        outputs0_for_nn_grad = force_list(outputs0_for_nn_grad)
        vinputs_identity_for_nn_grad = [
            v for b, v in zip(backward, vinputs_identity_for_nn_grad) if b
        ]
        vinputs_identity_for_nn_grad = list(
            filter(lambda x: x is not None, vinputs_identity_for_nn_grad))

        ograds1 = nn.grad(outputs0_for_nn_grad,
                          vinputs_identity_for_nn_grad,
                          grad_outputs=[g.d.copy() for g in grad_voutputs])
        F.sink(*ograds1).forward(clear_no_need_grad=True)
    ograds0 = list(filter(lambda o: o is not None, ograds0))
    ograds1 = list(filter(lambda o: o is not None, ograds1))
    for i in range(len(ograds0)):
        if ograds0[i].parent is None:
            continue
        assert_allclose(ograds0[i].d,
                        ograds1[i].d,
                        atol=atol_f,
                        err_msg="nn.grad and backward_functon results differ.")

    # Check backward
    # needed since we sometimes do need_grad=False for optimization, e.g., mask.
    def set_inputs(inputs0, vinputs):
        begin = 0
        for i in vinputs:
            end = begin + i.size
            i.d = inputs0[begin:end].reshape(i.shape)
            begin = end

    def obj_func(inputs0, voutput, vinputs):
        set_inputs(inputs0, vinputs)
        voutput.forward()
        y = voutput.d.copy()
        return y

    initial_grads = []
    for grad_vinput in grad_vinputs:
        if grad_vinput is None:
            continue
        g = np.asarray(rng.randn(*grad_vinput.shape))
        initial_grads.append(g)
    grad_inputs1 = np.concatenate(
        [v.d.flatten() for v in grad_vinputs if v is not None])

    for i, ograd in enumerate(ograds0):
        # We can skip if the backward is the functions composite.
        # If the backward is of functions composite,
        # the numerical difference is really different from the analytical one for some functions.
        if skip_backward_check:
            continue

        if ograd is None or not backward[i]:
            continue
        for ig, v in zip(initial_grads, grad_vinputs):
            v.g = ig

        # This must be first since approx_fprime destroys the input values
        # analytical grad.
        rgrad = rng.randn()
        with nn.auto_forward(auto_forward):
            sum_ograd = F.sum(ograd) * rgrad
        sum_ograd.forward(clear_no_need_grad=True)
        sum_ograd.backward()
        analytical_grads = np.concatenate(
            [v.g.flatten() for v in grad_vinputs])
        analytical_grads -= np.concatenate(
            [g.flatten() for g in initial_grads])
        # numerical grad
        from scipy.optimize import approx_fprime
        numerical_grads = approx_fprime(grad_inputs1, obj_func, dstep,
                                        sum_ograd, grad_vinputs)

        # grad_vinputs: dy_1, ..., dy_n, x_1, ..., x_n
        # grad_voutputs: dy_1, ..., dy_n
        seps = [0] + np.cumsum([int(np.prod(v.shape))
                                for v in grad_vinputs]).tolist()
        ngrads = len(grad_voutputs)
        ninputs = len(grad_vinputs)
        backward_b = [True] * ninputs if backward_b is None else backward_b
        for k, sep in enumerate(zip(seps[:-1], seps[1:])):
            if k >= ngrads and not backward[k - ngrads] or not backward_b[k]:
                continue
            s0, s1 = sep
            analytical_grad = analytical_grads[s0:s1]
            numerical_grad = numerical_grads[s0:s1]
            assert_allclose(
                analytical_grad,
                numerical_grad,
                atol=atol_accum,
                err_msg=
                "Backward (accum) of the backward function ({}) wrt {}-th / {} input fails."
                .format(func_backward.__name__, k, ninputs))

    # Some functions backward like AffineDataGrad and AffineFilterGrad does not check non-accum anywhere
    # so check those non-accum backward method here.
    if non_accum_check:
        # for any outputs, parents are the same function.
        parent = outputs0[0].parent
        inputs = parent.inputs
        # Accum
        initial_grads = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        accum = [True] * len(inputs)
        parent.backward(inputs, outputs0, accum=accum)
        accum_grads = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        non_accum_grads0 = accum_grads - initial_grads
        # Non-accum
        accum = [False] * len(inputs)
        parent.backward(inputs, outputs0, accum=accum)
        non_accum_grads1 = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        # Check
        assert_allclose(
            non_accum_grads0,
            non_accum_grads1,
            atol=atol_b,
            err_msg="Backward (non-accum) of the backward function ({}) fails."
            .format(func_backward.__name__))
Ejemplo n.º 30
0
def list_context(func_name):
    try:
        import list_context_ext
        return list_context_ext.list(func_name)
    except:
        return [(nn.Context(), func_name)]