Ejemplo n.º 1
0
def test_grad_grad_resnet(seed, ctx, auto_forward, inplace, shared):
    nn.clear_parameters()

    # Settings
    nn.set_default_context(ctx)
    nn.set_auto_forward(auto_forward)
    b, c, h, w = 4, 3, 32, 32
    n_cls = 10
    rng = np.random.RandomState(seed)

    # Network
    x = nn.Variable.from_numpy_array(rng.randn(b, c, h,
                                               w)).apply(need_grad=True)
    y = SmallResNet(x, inplace=inplace, shared=shared)

    # Grad of grad
    dx = nn.grad([y], [x])
    ddx = nn.grad([dx[0]], [x])
    ddx[0].forward() if not auto_forward else None
    # Backward of grad
    x.grad.zero()
    dx[0].forward() if not auto_forward else None
    dx[0].backward()

    # Check between results of var.backward and nn.grad
    backend = ctx.backend[0].split(":")[0]
    if backend == 'cuda':
        pytest.skip(
            'CUDA Convolution N-D is only supported in CUDNN extension')
    assert_allclose(x.g, ddx[0].d, atol=1e-6)
Ejemplo n.º 2
0
def test_double_backward_floating_variables():
    x = nn.Variable((2, 2), need_grad=True)
    y = nn.Variable((2, 3), need_grad=True)
    z = nn.Variable((2, 4), need_grad=True)
    w = F.concatenate(*[x, y, z], axis=-1)
    o = F.sin(w)
    dx = nn.grad([o], [x])[0]
    ddx = nn.grad([dx], [x])[0]  # Error must not happen
Ejemplo n.º 3
0
def rnn_backward(inputs,
                 num_layers=1,
                 nonlinearity='tanh',
                 dropout=None,
                 bidirectional=False,
                 training=True):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    if dropout != 0.0:
        raise ValueError("Dropout must be 0.0")

    dys = inputs[0]
    dhn = inputs[1]
    xs0 = inputs[2]
    h0 = inputs[3]
    w0 = inputs[4]

    if num_layers == 1:
        w = None
        b = inputs[5] if len(inputs) == 6 else None
    else:
        w = inputs[5]
        b = inputs[6] if len(inputs) == 7 else None
    num_directions = 2 if bidirectional else 1
    with_bias = True if b else False

    ys, hn = _create_fixed_length_rnn(xs0, h0, w0, w, b, num_layers,
                                      nonlinearity, num_directions, with_bias)
    outputs = [ys, hn]
    grad_outputs = [dys, dhn]
    if w and b:
        inputs = [xs0, h0, w0, w, b]
        dxs0, dh0, dw0, dw, db = nn.grad(outputs,
                                         inputs,
                                         grad_outputs=grad_outputs)
        return dxs0, dh0, dw0, dw, db
    if w and not b:
        inputs = [xs0, h0, w0, w]
        dxs0, dh0, dw0, dw = nn.grad(outputs,
                                     inputs,
                                     grad_outputs=grad_outputs)
        return dxs0, dh0, dw0, dw
    if not w and b:
        inputs = [xs0, h0, w0, b]
        dxs0, dh0, dw0, db = nn.grad(outputs,
                                     inputs,
                                     grad_outputs=grad_outputs)
        return dxs0, dh0, dw0, db
    if not w and not b:
        inputs = [xs0, h0, w0]
        dxs0, dh0, dw0 = nn.grad(outputs, inputs, grad_outputs=grad_outputs)
        return dxs0, dh0, dw0
Ejemplo n.º 4
0
 def jacobian(self, coordinates):
     new_coordinates = self.warp_coordinates(coordinates)
     new_coordinates_x = F.slice(new_coordinates, start=(
         0, 0, 0), stop=new_coordinates.shape[:2] + (1,))
     grad_x = nn.grad([F.sum(new_coordinates_x)], [coordinates])
     new_coordinates_y = F.slice(new_coordinates, start=(
         0, 0, 1), stop=new_coordinates.shape[:2] + (2,))
     grad_y = nn.grad([F.sum(new_coordinates_y)], [coordinates])
     gx = F.reshape(grad_x[0], grad_x[0].shape[:-1] +
                    (1,) + grad_x[0].shape[-1:])
     gy = F.reshape(grad_y[0], grad_y[0].shape[:-1] +
                    (1,) + grad_y[0].shape[-1:])
     jacobian = F.concatenate(gx, gy, axis=gy.ndim-2)
     return jacobian
Ejemplo n.º 5
0
def test_bool_scatter_inplace(seed, ctx, func_name, gshape, mask_shape):
    from nbla_test_utils import inplace_function_test_helper

    rng = np.random.RandomState(seed)
    gdata0 = rng.randn(*gshape).astype(np.float32)
    mask = rng.randint(0, 2, size=mask_shape)
    sdata = gdata0[mask.astype(np.bool)]
    gdata1 = rng.randn(*gshape).astype(np.float32)

    v_sdata = nn.Variable.from_numpy_array(sdata).apply(need_grad=True)
    v_mask = nn.Variable.from_numpy_array(mask)
    v_gdata1 = nn.Variable.from_numpy_array(gdata1).apply(need_grad=True)

    with nn.auto_forward():
        v_gdata2 = F.bool_scatter(v_sdata, v_mask, v_gdata1)

    # inplace check
    np.testing.assert_allclose(
        v_gdata2.d,
        v_gdata1.d,
        err_msg="F.bool_scatter(inplace) is not inplaced.")

    # ref check
    gdata2 = ref_bool_scatter_inplace(sdata, mask, gdata1)
    np.testing.assert_allclose(v_gdata2.d,
                               gdata2,
                               err_msg="F.bool_scatter(inplace) fails.")

    # backward wrt inplaced variable (wrt sdata is checked in not-inplaced case)
    egrad = rng.randn(*gdata2.shape)
    mask = mask if mask.shape == gdata1.shape else \
        mask.reshape(mask.shape + (1, ) * (gdata1.ndim - mask.ndim))
    ref_grad = egrad * (1 - mask)
    v_gdata1.grad.fill(0)
    v_gdata2.backward(egrad)
    np.testing.assert_allclose(
        v_gdata1.g,
        ref_grad,
        err_msg="F.bool_scatter(inplace) backward wrt inplace data fails.")

    bgrad = rng.randn(*gdata1.shape)
    v_gdata1.g = bgrad
    v_gdata2.backward(egrad)
    np.testing.assert_allclose(
        v_gdata1.g - bgrad,
        ref_grad,
        atol=1e-6,
        err_msg=
        "F.bool_scatter(inplace) backward (accum) wrt inplace data fails.")

    # nn.grad (wrt sdata is checked in not-inplaced case)
    with nn.auto_forward():
        d_gdata1 = nn.grad([v_gdata2], [v_gdata1], grad_outputs=[egrad])
    np.testing.assert_allclose(
        d_gdata1[0].d,
        ref_grad,
        atol=1e-6,
        err_msg="nn.grad (F.bool_scatter(inplace)) wrt inplace data fails.")
Ejemplo n.º 6
0
def _calc_gradient_penalty(real, fake, discriminator):
    alpha = F.rand(shape=(1, 1, 1, 1))
    interpolates = alpha * real + (1.0 - alpha) * fake
    interpolates.need_grad = True

    disc_interpolates = discriminator(x=interpolates)

    grads = nn.grad([disc_interpolates], [interpolates])
    norms = [F.sum(g ** 2.0, axis=1) ** 0.5 for g in grads]
    return sum([F.mean((norm - 1.0) ** 2.0) for norm in norms])
Ejemplo n.º 7
0
def test_nn_grad_propagate_down_check():
    register("IdentityForwardOnlyFunction",
             IdentityForwardOnlyFunction_backward)
    backward_func = registry["IdentityForwardOnlyFunction"]
    assert backward_func is not None

    x = nn.Variable.from_numpy_array(np.random.random((1, 1, 32, 32)))
    y = PF.convolution(x, 1, kernel=(3, 3), pad=(1, 1), with_bias=False)
    z = IdentityForwardOnlyFunction()(y)
    w = F.identity(z)

    # If IdentityForwardOnlyFunction_backward is called in nn.grad, an error will occur.
    v = nn.grad(w, [z])
    v[0].forward()
Ejemplo n.º 8
0
def test_shared_leaf_variable_basic_arithmetics(seed, ctx, auto_forward):
    def add(x, derivative=0):
        if derivative == 0:
            return x + x + x
        if derivative == 1:
            return 3 * np.ones_like(x)
        if derivative == 2:
            return np.zeros_like(x)

    def sub(x, derivative=0):
        if derivative == 0:
            return x - x - x
        if derivative == 1:
            return -1 * np.ones_like(x)
        if derivative == 2:
            return np.zeros_like(x)

    def mul(x, derivative=0):
        if derivative == 0:
            return x * x * x
        if derivative == 1:
            return 3 * x**2
        if derivative == 2:
            return 6 * x

    def div(x, derivative=0):
        if derivative == 0:
            return x / x / x
        if derivative == 1:
            return -x**-2
        if derivative == 2:
            return 2 * x**-3

    # Settings
    nn.set_default_context(ctx)
    nn.set_auto_forward(auto_forward)

    for math_type in [add, sub, mul, div]:
        xd = np.random.randn(2, 3) + 0.5
        x = nn.Variable.from_numpy_array(xd).apply(need_grad=True)
        x.grad.zero()
        y = math_type(x)
        # First-order gradient
        dy_dx = nn.grad([y], [x])
        if not auto_forward:
            dy_dx[0].forward()
        assert_allclose(dy_dx[0].d, math_type(xd, 1))
        # Second-order gradient
        dy_dx[0].backward()
        assert_allclose(x.g, math_type(xd, 2))
Ejemplo n.º 9
0
def test_compute_simple_hessian(ctx):
    nn.clear_parameters()

    # Network
    state = nn.Variable((1, 2))
    output = PF.affine(state,
                       1,
                       w_init=I.ConstantInitializer(value=1.),
                       b_init=I.ConstantInitializer(value=1.))
    loss = F.sum(output**2)
    # Input
    state_array = np.array([[1.0, 0.5]])
    state.d = state_array

    # Grad of network
    params = nn.get_parameters().values()
    for param in params:
        param.grad.zero()
    grads = nn.grad([loss], params)
    flat_grads = F.concatenate(*[F.reshape(grad, (-1,)) for grad in grads]) if len(grads) > 1 \
        else F.reshape(grads[0], (-1,))

    # Compute hessian
    hessian = np.zeros((flat_grads.shape[0], flat_grads.shape[0]),
                       dtype=np.float32)
    for i in range(flat_grads.shape[0]):
        flat_grads_i = flat_grads[i]
        flat_grads_i.forward()
        for param in params:
            param.grad.zero()
        flat_grads_i.backward()
        num_index = 0
        for param in params:
            grad = param.g.flatten()  # grad of grad so this is hessian
            hessian[i, num_index:num_index + len(grad)] = grad
            num_index += len(grad)

    actual = hessian
    expected = np.array([[
        2 * state_array[0, 0]**2, 2 * state_array[0, 0] * state_array[0, 1],
        2 * state_array[0, 0]
    ],
                         [
                             2 * state_array[0, 0] * state_array[0, 1],
                             2 * state_array[0, 1]**2, 2 * state_array[0, 1]
                         ], [2 * state_array[0, 0], 2 * state_array[0, 1],
                             2.]])
    assert_allclose(actual, expected)
Ejemplo n.º 10
0
def test_grad_outputs(seed, ctx, auto_forward, type_grad_outputs):
    from nbla_test_utils import ArrayDiffStats

    # Settings
    nn.set_default_context(ctx)
    nn.set_auto_forward(auto_forward)
    b, c, h, w = 4, 3, 32, 32
    n_cls = 10
    rng = np.random.RandomState(seed)

    x = nn.Variable.from_numpy_array(rng.randn(b, c, h,
                                               w)).apply(need_grad=True)
    y = F.sigmoid(x)

    # Grad outputs
    if type_grad_outputs == int:
        g = rng.randint(-10, 10)
    elif type_grad_outputs == float:
        g = rng.randn()
    elif type_grad_outputs == np.ndarray:
        g = rng.randn(*y.shape)
    elif type_grad_outputs == nn.NdArray:
        g = nn.NdArray.from_numpy_array(rng.randn(*y.shape))

    # Zerograd, Forward, Backward on the forward graph
    inputs = [x]
    [inp.grad.fill(0) for inp in inputs]
    if not auto_forward:
        y.forward()
    y.backward(g)

    # Grad
    inputs = [x]
    outputs = [y]
    grad_outputs = [g]
    grads = nn.grad(outputs, inputs, grad_outputs)
    if not auto_forward:
        F.sink(*grads, one_input_grad=1).forward()

    # Check between results of var.bacwkard and nn.grad
    for inp, grad in zip(inputs, grads):
        assert np.allclose(inp.g, grad.d,
                           atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
Ejemplo n.º 11
0
def test_multiple_objectives(seed, ctx, auto_forward):
    from nbla_test_utils import ArrayDiffStats

    # Settings
    nn.set_default_context(ctx)
    nn.set_auto_forward(auto_forward)
    b, c, h, w = 4, 3, 32, 32
    n_cls = 10
    rng = np.random.RandomState(seed)

    # Objecive0
    x0 = nn.Variable.from_numpy_array(rng.randn(b, c, h,
                                                w)).apply(need_grad=True)
    y0 = F.sigmoid(x0)
    # Objecive1
    x1 = nn.Variable.from_numpy_array(rng.randn(b, c, h,
                                                w)).apply(need_grad=True)
    y1 = F.tanh(x1)

    # Zerograd, Forward, Backward on the forward graph
    g0 = nn.NdArray.from_numpy_array(rng.randn(*x0.shape))
    g1 = nn.NdArray.from_numpy_array(rng.randn(*x1.shape))
    z = y0 * nn.Variable(g0.shape).apply(data=g0) + y1 * \
        nn.Variable(g1.shape).apply(data=g1)
    inputs = [x0, x1]
    [inp.grad.fill(0) for inp in inputs]
    if not auto_forward:
        z.forward()
    z.backward()

    # Grad
    inputs = [x0, x1]
    outputs = [y0, y1]
    grad_outputs = [g0, g1]
    grads = nn.grad(outputs, inputs, grad_outputs)
    if not auto_forward:
        F.sink(*grads, one_input_grad=1).forward()

    # Check between results of var.bacwkard and nn.grad
    for inp, grad in zip(inputs, grads):
        assert np.allclose(inp.g, grad.d,
                           atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
Ejemplo n.º 12
0
def gen_path_regularize(fake_img,
                        latents,
                        mean_path_length,
                        decay=0.01,
                        pl_weight=2.0):

    noise = F.randn(shape=fake_img.shape) / \
                    np.sqrt(fake_img.shape[2]*fake_img.shape[3])

    gradient = nn.grad([F.sum(fake_img * noise)], [latents])[0]
    path_lengths = F.mean(F.sum(F.pow_scalar(gradient, 2), axis=1), axis=0)
    path_lengths = F.pow_scalar(path_lengths, 0.5)

    path_mean = mean_path_length + decay * \
        (F.mean(path_lengths) - mean_path_length)

    path_penalty = F.mean(
        F.pow_scalar(path_lengths - F.reshape(path_mean, (1, ), inplace=False),
                     1))
    return path_penalty * pl_weight, path_mean, path_lengths
Ejemplo n.º 13
0
def test_dropout_grad_dependency(p, seed, ctx, func_name):
    from nnabla._dropout_workaround import _get_dropout_mask
    # Test whether the memory clearance by grad_depends_on_inputs/outputs does
    # something bad during graph execution such as the clearance values which
    # is planned to be used. This test is performed by changing the
    # inputs/outputs of Dropout to intermediate variables in the same manner of
    # nbla_test_utils.py.
    atol_f = 1e-4

    with nn.context_scope(ctx):
        rng = np.random.RandomState(seed)
        init_x = rng.randn(2, 3, 4).astype(np.float32) * 2
        init_dy_for_grad = rng.randn(*init_x.shape).astype(init_x.dtype)
        init_dx = rng.randn(*init_x.shape).astype(init_x.dtype)
        init_for_dx2 = rng.randn(*init_x.shape).astype(init_x.dtype)

        # Graph construction
        x = nn.Variable.from_numpy_array(init_x).apply(need_grad=True)
        x_interm = F.identity(x)
        y_interm = F.dropout(x_interm, p, seed)
        y = F.identity(y_interm)
        dx_interm = nn.grad(y, x, grad_outputs=[init_dy_for_grad])[0]
        dx = F.identity(dx_interm)
        y_dx = y + dx  # replaceable with F.sink(y, dx, one_input_grad=False)

        # Execution
        x.g = init_dx  # Accumulation
        y_dx.forward(clear_no_need_grad=True)
        mask = _get_dropout_mask(x_interm).d  # Store mask before the clear
        y_dx.backward(init_for_dx2, clear_buffer=True)

        # Reference
        ref_dx = ref_dropout_double_backward(init_for_dx2, mask, p) + init_dx

        # Test
        assert_allclose(x.g,
                        ref_dx,
                        atol=atol_f,
                        err_msg="Wrong output values of double backward of "
                        "Dropout by nn.grad.")
Ejemplo n.º 14
0
def test_resnet_expansion(seed, ctx, auto_forward, flag_grad_outputs):
    from nbla_test_utils import ArrayDiffStats
    nn.clear_parameters()

    # Settings
    nn.set_default_context(ctx)
    nn.set_auto_forward(auto_forward)
    b, c, h, w = 4, 3, 32, 32
    n_cls = 10
    rng = np.random.RandomState(seed)

    # Network
    x = nn.Variable.from_numpy_array(rng.randn(b, c, h, w))
    y = nn.Variable.from_numpy_array(rng.randint(0, n_cls, b).reshape(b, 1))
    p = SmallResNet(x)
    loss = F.mean(F.softmax_cross_entropy(p, y))

    # Zerograd, Forward, Backward on the forward graph
    inputs = nn.get_parameters().values()
    [inp.grad.fill(0) for inp in inputs]
    grad = nn.NdArray.from_numpy_array(np.asarray(
        rng.randn())) if flag_grad_outputs else 1
    if not auto_forward:
        loss.forward()
    loss.backward(grad)

    # Grad
    grad_outputs = grad if flag_grad_outputs else None
    grads = nn.grad([loss], inputs, [grad_outputs])
    if not auto_forward:
        F.sink(*grads, one_input_grad=1).forward()

    # Check between results of var.bacwkard and nn.grad
    backend = ctx.backend[0].split(":")[0]
    if backend == 'cuda':
        pytest.skip(
            'CUDA Convolution N-D is only supported in CUDNN extension')
    for inp, grad in zip(inputs, grads):
        assert np.allclose(inp.g, grad.d,
                           atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
Ejemplo n.º 15
0
def test_dropout_double_backward(p, seed, ctx, func_name):
    from nbla_test_utils import cap_ignore_region, backward_function_tester

    rng = np.random.RandomState(seed)
    inpd = cap_ignore_region(
        rng.randn(2, 3, 4).astype(np.float32) * 2,
        (-1e-3, 1e-3))  # Ensure there is no zero.
    inp = nn.Variable.from_numpy_array(inpd).apply(need_grad=True)
    # ONLY test the double backward
    with nn.context_scope(ctx):
        dout = F.dropout(inp, p, seed)
        out = F.sigmoid(dout)

    # Check gradient w.r.t. dy only since no backward w.r.t. x
    grads = nn.grad([out], [inp])
    grad = grads[0]
    grad.forward()
    grad.backward(1.0, clear_buffer=False)
    g_dy = grad.parent.inputs[1].g
    scale = 1. / (1. - p)
    mask = dout.d != 0
    assert np.allclose(g_dy, mask * scale)
Ejemplo n.º 16
0
def backward_function_tester(rng,
                             func,
                             inputs=None,
                             func_args=[],
                             func_kwargs={},
                             atol_f=1e-4,
                             atol_b=1e-3,
                             atol_accum=5e-2,
                             dstep=1e-3,
                             backward=None,
                             backward_b=None,
                             ctx=None,
                             non_accum_check=False,
                             skip_backward_check=False,
                             insert_identity=[],
                             auto_forward=False):
    """ Automatic testing of backward function and backward pass of `func` by comparing it.
    The backward pass of `func` is the reference; therefore, 
    the backward pass of `func` must be tested first!

    Syntax of `ref_func`: inputs, parameters
    """

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True for _ in inputs]

    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinp = nn.Variable(i.shape, need_grad=b)
            vinp.grad.zero()  # grads always not accumulation
            vinputs += [vinp]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    vinputs = create_variables(inputs, backward)
    vinputs_for_clear_buffer = create_variables(inputs, backward)
    vinputs_for_nn_grad = create_variables(inputs, backward)

    vinputs_identity = []
    vinputs_identity_for_clear_buffer = []
    vinputs_identity_for_nn_grad = []
    if not insert_identity:
        insert_identity = [True] * len(vinputs)

    for idx, i in enumerate(
            zip(vinputs, vinputs_for_clear_buffer, vinputs_for_nn_grad)):
        with nn.auto_forward(auto_forward):
            i0, i1, i2 = i
            if i0 is None:
                vinputs_identity += [None]
                vinputs_identity_for_clear_buffer += [None]
                vinputs_identity_for_nn_grad += [None]
            elif insert_identity[idx]:
                vinputs_identity += [F.identity(i0)]
                vinputs_identity_for_clear_buffer += [F.identity(i1)]
                vinputs_identity_for_nn_grad += [F.identity(i2)]
            else:
                vinputs_identity += [i0]
                vinputs_identity_for_clear_buffer += [i1]
                vinputs_identity_for_nn_grad += [i2]

    # Forward and backward of the forward function with no buffer clear
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs0 = func(*(vinputs_identity + func_args), **func_kwargs)
        outputs0 = force_list(outputs0)
        F.sink(*outputs0).forward(clear_no_need_grad=False)
    grad_voutputs = []
    for output in outputs0:
        ograd = rng.randn(*output.shape)
        grad_voutputs.append(
            nn.Variable.from_numpy_array(ograd).apply(need_grad=True))
        output.g = ograd
    F.sink(*outputs0, one_input_grad=False).backward()
    vinputs = list(filter(lambda x: x is not None, vinputs))
    vinputs_identity = list(filter(lambda x: x is not None, vinputs_identity))
    vinputs_for_clear_buffer = list(
        filter(lambda x: x is not None, vinputs_for_clear_buffer))
    grad_inputs0 = [inp.g.copy() for inp in vinputs]

    # Forward and backward of the forward function with clear redundant buffer
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs_for_clear_buffer = func(
            *(vinputs_identity_for_clear_buffer + func_args), **func_kwargs)
        outputs_for_clear_buffer = force_list(outputs_for_clear_buffer)
        outputs_for_clear_buffer = list(
            map(lambda x: F.identity(x)
                if x is not None else None, outputs_for_clear_buffer))
        F.sink(*outputs_for_clear_buffer).forward(clear_no_need_grad=True)

    for o, ref_o in zip(outputs_for_clear_buffer, outputs0):
        o.g = ref_o.g

    # Check backward
    F.sink(*outputs_for_clear_buffer,
           one_input_grad=False).backward(clear_buffer=True)

    grad_inputs_for_clear_buffer = [
        inp.g.copy() for inp in vinputs_for_clear_buffer
    ]
    for grad_ref, grad_res in zip(grad_inputs0, grad_inputs_for_clear_buffer):
        if grad_ref is None or grad_res is None:
            continue
        assert_allclose(
            grad_ref,
            grad_res,
            atol=atol_f,
            err_msg=
            "backward(clear_buffer=True) and backward(clear_buffer=False) results differ."
        )

    # Forward of the backward function
    from nnabla.backward_functions import registry
    func_name = output.parent.info.type_name
    func_backward = registry[func_name]
    grad_vinputs = grad_voutputs + vinputs
    grad_vinputs_identity = grad_voutputs + vinputs_identity
    func_info_args = output.parent.info.args
    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        ograds0 = func_backward(grad_vinputs_identity, **func_info_args)
        ograds0 = force_list(ograds0)
        ograds0_ = list(filter(lambda o: o is not None, ograds0))
        F.sink(*ograds0_).forward(clear_no_need_grad=True)
    outputs1 = []
    for i, ograd in enumerate(ograds0):
        outputs1.append(ograd.d.copy()) if ograd is not None else \
          outputs1.append(None)

    # Check num of returned elements
    assert_allclose(
        len(vinputs),
        len(outputs1),
        err_msg="Length of the outputs ({}) does not match "
        "the length of the inputs ({}) to the backward function".format(
            len(outputs1), len(vinputs)))

    # Check forward
    for i, elm in enumerate(zip(grad_inputs0, outputs1)):
        grad_ref, grad_res = elm
        if grad_ref is None or grad_res is None:
            continue
        assert_allclose(
            grad_ref,
            grad_res,
            atol=atol_f,
            err_msg=
            "Forward of the backward function ({}) fails at {}-th output.".
            format(func_backward.__name__, i))

    # Check the same results between backward_function and nn.grad
    vinputs = [v for b, v in zip(backward, vinputs) if b]
    vinputs = list(filter(lambda x: x is not None, vinputs))

    with nn.context_scope(ctx), nn.auto_forward(auto_forward):
        outputs0_for_nn_grad = func(
            *(vinputs_identity_for_nn_grad + func_args), **func_kwargs)
        outputs0_for_nn_grad = force_list(outputs0_for_nn_grad)
        vinputs_identity_for_nn_grad = [
            v for b, v in zip(backward, vinputs_identity_for_nn_grad) if b
        ]
        vinputs_identity_for_nn_grad = list(
            filter(lambda x: x is not None, vinputs_identity_for_nn_grad))

        ograds1 = nn.grad(outputs0_for_nn_grad,
                          vinputs_identity_for_nn_grad,
                          grad_outputs=[g.d.copy() for g in grad_voutputs])
        F.sink(*ograds1).forward(clear_no_need_grad=True)
    ograds0 = list(filter(lambda o: o is not None, ograds0))
    ograds1 = list(filter(lambda o: o is not None, ograds1))
    for i in range(len(ograds0)):
        if ograds0[i].parent is None:
            continue
        assert_allclose(ograds0[i].d,
                        ograds1[i].d,
                        atol=atol_f,
                        err_msg="nn.grad and backward_functon results differ.")

    # Check backward
    # needed since we sometimes do need_grad=False for optimization, e.g., mask.
    def set_inputs(inputs0, vinputs):
        begin = 0
        for i in vinputs:
            end = begin + i.size
            i.d = inputs0[begin:end].reshape(i.shape)
            begin = end

    def obj_func(inputs0, voutput, vinputs):
        set_inputs(inputs0, vinputs)
        voutput.forward()
        y = voutput.d.copy()
        return y

    initial_grads = []
    for grad_vinput in grad_vinputs:
        if grad_vinput is None:
            continue
        g = np.asarray(rng.randn(*grad_vinput.shape))
        initial_grads.append(g)
    grad_inputs1 = np.concatenate(
        [v.d.flatten() for v in grad_vinputs if v is not None])

    for i, ograd in enumerate(ograds0):
        # We can skip if the backward is the functions composite.
        # If the backward is of functions composite,
        # the numerical difference is really different from the analytical one for some functions.
        if skip_backward_check:
            continue

        if ograd is None or not backward[i]:
            continue
        for ig, v in zip(initial_grads, grad_vinputs):
            v.g = ig

        # This must be first since approx_fprime destroys the input values
        # analytical grad.
        rgrad = rng.randn()
        with nn.auto_forward(auto_forward):
            sum_ograd = F.sum(ograd) * rgrad
        sum_ograd.forward(clear_no_need_grad=True)
        sum_ograd.backward()
        analytical_grads = np.concatenate(
            [v.g.flatten() for v in grad_vinputs])
        analytical_grads -= np.concatenate(
            [g.flatten() for g in initial_grads])
        # numerical grad
        from scipy.optimize import approx_fprime
        numerical_grads = approx_fprime(grad_inputs1, obj_func, dstep,
                                        sum_ograd, grad_vinputs)

        # grad_vinputs: dy_1, ..., dy_n, x_1, ..., x_n
        # grad_voutputs: dy_1, ..., dy_n
        seps = [0] + np.cumsum([int(np.prod(v.shape))
                                for v in grad_vinputs]).tolist()
        ngrads = len(grad_voutputs)
        ninputs = len(grad_vinputs)
        backward_b = [True] * ninputs if backward_b is None else backward_b
        for k, sep in enumerate(zip(seps[:-1], seps[1:])):
            if k >= ngrads and not backward[k - ngrads] or not backward_b[k]:
                continue
            s0, s1 = sep
            analytical_grad = analytical_grads[s0:s1]
            numerical_grad = numerical_grads[s0:s1]
            assert_allclose(
                analytical_grad,
                numerical_grad,
                atol=atol_accum,
                err_msg=
                "Backward (accum) of the backward function ({}) wrt {}-th / {} input fails."
                .format(func_backward.__name__, k, ninputs))

    # Some functions backward like AffineDataGrad and AffineFilterGrad does not check non-accum anywhere
    # so check those non-accum backward method here.
    if non_accum_check:
        # for any outputs, parents are the same function.
        parent = outputs0[0].parent
        inputs = parent.inputs
        # Accum
        initial_grads = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        accum = [True] * len(inputs)
        parent.backward(inputs, outputs0, accum=accum)
        accum_grads = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        non_accum_grads0 = accum_grads - initial_grads
        # Non-accum
        accum = [False] * len(inputs)
        parent.backward(inputs, outputs0, accum=accum)
        non_accum_grads1 = np.concatenate(
            [inp.g.flatten() for inp, b in zip(inputs, backward) if b])
        # Check
        assert_allclose(
            non_accum_grads0,
            non_accum_grads1,
            atol=atol_b,
            err_msg="Backward (non-accum) of the backward function ({}) fails."
            .format(func_backward.__name__))
Ejemplo n.º 17
0
def main(args):
    from network import implicit_network

    # Setting
    # nn.set_auto_forward(True)
    ctx = get_extension_context('cudnn', device_id=args.device_id)
    nn.set_default_context(ctx)
    D = args.depth
    L = args.layers
    W = args.width
    H = args.height
    R = H * W
    z_orientation = 1

    # Camera parameters
    camera = Camera(image_width=W, image_height=H, z_orientation=z_orientation)
    camloc = np.array([0.75, 0.5, 1])
    camloc = (camloc / np.sum(camloc**2)**0.5) * 2
    to = np.array([0, 0, 0])
    Rt_inv = look_at(camloc, to, z_orientation=z_orientation)
    R_inv = Rt_inv[:3, :3]
    fov = 90
    K_inv = camera.compute_intrinsic_inv(fov)

    # Rays
    x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy")
    xy = np.asarray([x.flatten(), y.flatten()])
    xy1 = np.concatenate([xy, np.ones(R)[np.newaxis, :]])
    raydir = R_inv.dot(K_inv.dot(xy1))
    raydir = raydir / np.sum(raydir**2, axis=0)**0.5
    raydir = raydir.transpose((1, 0))

    # Network
    camloc = nn.Variable.from_numpy_array(camloc[np.newaxis, ...])
    raydir = nn.Variable.from_numpy_array(raydir[np.newaxis, ...])
    sdf_net = partial(implicit_network,
                      D=D,
                      L=L,
                      initial_sphere_radius=args.initial_sphere_radius)
    sdf_net0 = sdf_net

    def sdf_net0(x):
        out = sdf_net(x)
        sdf = out[..., 0][..., np.newaxis]
        return sdf

    # Sphere trace
    t_near = args.t_near
    t_far = args.t_far
    sphere_trace_itr = args.sphere_trace_itr
    ray_march_points = args.ray_march_points
    n_chunks = args.n_chunks
    max_post_itr = args.max_post_itr
    post_method = args.post_method
    eps = args.eps
    st = time.time()
    x_hit, mask_hit, dists, _, _ = ray_trace(sdf_net0,
                                             camloc,
                                             raydir,
                                             test=True,
                                             t_near=t_near,
                                             t_far=t_far,
                                             sphere_trace_itr=sphere_trace_itr,
                                             ray_march_points=ray_march_points,
                                             n_chunks=n_chunks,
                                             max_post_itr=max_post_itr,
                                             post_method=post_method,
                                             eps=eps)

    x_hit.need_grad = False
    dists.need_grad = False
    mask_hit.need_grad = False

    x_curr = x_hit
    F.sink(*[x_curr, mask_hit]).forward(clear_buffer=False)
    # Lighting
    x_curr = x_curr.get_unlinked_variable(need_grad=True)
    sdf = sdf_net0(x_curr)
    normal = nn.grad([sdf], [x_curr])[0]
    normal = F.norm_normalization(normal, axes=normal.ndim - 1, eps=1e-24)
    dlight = DistantLight()
    cos = lambert(normal, dlight.direction.reshape([3, 1])).reshape((1, H, W))
    mask_hit = mask_hit.get_unlinked_variable(need_grad=False)
    mask_hit = F.reshape(mask_hit, (1, H, W))
    mask_hit = F.broadcast(mask_hit, (3, H, W))
    image = mask_hit * 255.0 * cos
    image.forward(clear_buffer=True)

    cv2.imwrite(
        f"sphere_{W}x{H}_sti{sphere_trace_itr:03d}_mpi{max_post_itr:03d}_{args.post_method}.png",
        image.d.transpose(1, 2, 0))
    print(
        f"Bidirectional sphere trace/ray march (W={W}, H={H}): {time.time() - st} [s]"
    )
Ejemplo n.º 18
0
def backward_function_tester(rng,
                             func,
                             ref_func,
                             inputs,
                             func_args=[],
                             func_kwargs={},
                             atol_f=1e-6,
                             atol_b=1e-3,
                             atol_accum=1e-3,
                             dstep=1e-3,
                             backward=None,
                             ctx=None,
                             func_name=None,
                             ref_grad=None,
                             disable_half_test=False,
                             atol_half=1e-1):
    """Backward function tester

    In the forward test, it compares the results of nn.grad and `func`.backward.
    In the backward test, it compares the analytical gradients and numerical gradient with `grad_outputs`.
    """
    # TODO: half

    from scipy.optimize import approx_fprime

    if ctx is None:
        ctx = nn.Context()
    if backward is None:
        backward = [True if i is not None else False for i in inputs]

    # TODO: Remove set_default_context after adding ctx to BackwardFunction.
    nn.set_default_context(ctx)

    # Create Variables
    def create_variables(inputs, backward):
        vinputs = []
        for i, b in zip(inputs, backward):
            if i is None:
                vinputs += [None]
                continue
            vinputs += [nn.Variable(i.shape, need_grad=b)]
            vinputs[-1].data.cast(i.dtype)[...] = i
        return vinputs

    # Create grad_outputs
    def create_grad_outputs(outputs):
        grad_outputs = []
        for o in outputs:
            if o.shape == ():
                go = nn.NdArray.from_numpy_array(np.array(randn(rng)))
                #go = nn.NdArray.from_numpy_array(np.array(1.0))
            else:
                go = nn.NdArray.from_numpy_array(randn(rng, *o.shape))
                #go = nn.NdArray.from_numpy_array(np.ones(o.shape))

            grad_outputs.append(go)
        return grad_outputs

    # Fill grads
    def fill_grads(vinputs, grads):
        for vi, gd in zip(vinputs, grads):
            if vi is None:
                continue
            vi.g = gd

    # Fill grads
    def zero_grads(vinputs):
        for vi in vinputs:
            if vi is None:
                continue
            vi.grad.zero()
        return

    # Gradient penalty on grads
    def gradient_penalty2(grads):
        gp2 = 0.0
        for g in grads:
            gp2 += F.sum(g**2.0)
        return gp2

    # Product sum

    def prod_sum(inputs0, inputs1):
        out = 0.0
        for inp0, inp1 in zip(inputs0, inputs1):
            out += inp0 * nn.Variable(inp1.shape).apply(data=inp1)
        return out

    # Set inputs for the numerical gradients

    def set_inputs(inputs0, vinputs):
        begin = 0
        for i in vinputs:
            end = begin + i.size
            if i.need_grad == True:
                i.d = inputs0[begin:end].reshape(i.shape)
            begin = end

    # Gradient penalty on grads used for computing numerical gradients
    def obj_func(inputs0, gp2, vinputs):
        set_inputs(inputs0, vinputs)
        gp2.forward()
        return gp2.d.copy()

    # # Half test
    # if not disable_half_test:
    #     finputs = create_variables(inputs, backward)
    #     hinputs = create_variables(inputs, backward)
    #     half_test(rng, func, finputs, hinputs, func_args,
    #               func_kwargs, backward, ctx, func_name, atol=atol_half)

    # Create input variables
    vinputs = create_variables(inputs, backward)
    # --- Forward test --- #
    # Zero grads
    zero_grads(vinputs)
    # Forward/Backward on the forward graph
    voutputs = [
        F.sigmoid(x)
        for x in force_list(func(*(vinputs + func_args), **func_kwargs))
    ]
    agrad_outputs = create_grad_outputs(voutputs)
    o = prod_sum(voutputs, agrad_outputs)
    o.forward()
    o.backward()  # clear_buffer=True)
    # Grads
    voutputs = voutputs
    vinputs = list(filter(lambda vi: vi is not None, vinputs))
    agrad_outputs = agrad_outputs
    grads = nn.grad(voutputs, vinputs, agrad_outputs)
    grads = list(filter(lambda x: x is not None, grads))
    o = F.sink(*grads)
    o.forward()
    # Check forward
    for vi, go in zip(vinputs, grads):
        if vi.need_grad is False:
            continue
        fgrads = vi.g
        bgrads = go.d
        assert_allclose(fgrads, bgrads, atol=atol_f)

    # TODO: 1. Pass function argument directly to backward functions.
    # TODO: 2. should be changed for the simplier form by simply testing BackwardFunction

    # --- Backward (accum = False) test --- #
    # Zero grads
    zero_grads(vinputs)
    # Compute analytical grads
    gp2 = gradient_penalty2(grads)
    gp2.forward()
    gp2.backward(clear_buffer=True)
    analytical_grads = np.concatenate(
        [vi.g.copy().flatten() for vi in vinputs])
    analytical_grads0 = analytical_grads
    # Compute numerical grads
    inputs0 = np.concatenate(
        [inp.flatten() for inp in inputs if inp is not None])
    numerical_grads = approx_fprime(inputs0, obj_func, dstep, gp2, vinputs)
    # Check backward
    assert_allclose(analytical_grads, numerical_grads, atol=atol_b)

    # --- Backward (accum = True) test --- #
    # Random grads
    rand_grads = [randn(rng, *vi.shape) for vi in vinputs]
    fill_grads(vinputs, rand_grads)
    # Compute analytical grads
    gp2.forward()
    gp2.backward(clear_buffer=True)

    analytical_grads = np.concatenate(
        [vi.g.copy().flatten() for vi in vinputs])
    rand_grads = np.concatenate([
        rg.flatten() if isinstance(rg, np.ndarray) else np.array(rg).reshape(
            (1, )) for rg in rand_grads
    ])
    analytical_grads -= rand_grads
    # Check backward
    assert_allclose(analytical_grads, analytical_grads0, atol=atol_accum)
Ejemplo n.º 19
0
    x_fake = generator(z, test=False)
    print(x_fake)

    # Prob for fake sample
    print("# Prob for fake sample")
    p_fake = discriminator(x_fake)
    print(p_fake)

    # Prob for real sample
    p_real = discriminator(x_real)

    # WGAN loss
    print("# WGAN loss")
    loss_gen = gan_loss(p_fake)
    print(loss_gen)
    loss_dis = gan_loss(p_fake, p_real)
    print(loss_dis)

    # Gradient penalty
    print("# Gradient penalty")
    x_rmix = eps * x_real + (1.0 - eps) * x_fake
    p_rmix = discriminator(x_rmix)
    grads = nn.grad([p_rmix], [x_rmix])
    print(grads)
    l2norms = [F.sum(g**2.0, [1, 2, 3])**0.5 for g in grads]
    gp = sum([F.mean((l - 1.0)**2.0) for l in l2norms])

    loss_dis += gp
    gp.forward()
    gp.backward()
Ejemplo n.º 20
0
def infl_icml(model_info_dict, file_dir_dict, use_all_params, need_evaluate,
              alpha):
    num_epochs = 2
    # params
    lr = 0.005
    seed = model_info_dict['seed']
    net_func = model_info_dict['net_func']
    batch_size = model_info_dict['batch_size']
    test_batch_size = 1000
    target_epoch = model_info_dict['num_epochs']
    # files and dirs
    save_dir = file_dir_dict['save_dir']
    infl_filename = file_dir_dict['infl_filename']
    final_model_name = file_dir_dict['model_filename']
    final_model_path = os.path.join(save_dir, 'epoch%02d' % (target_epoch - 1),
                                    'weights', final_model_name)
    input_dir_name = os.path.dirname(file_dir_dict['train_csv'])

    # setup
    trainset, valset, image_shape, n_classes, ntr, nval = init_dataset(
        file_dir_dict['train_csv'], file_dir_dict['val_csv'], seed)
    n_channels, _h, _w = image_shape
    resize_size = get_image_size((_h, _w))
    idx_train = get_indices(ntr, seed)
    idx_val = get_indices(nval, seed)

    nn.load_parameters(final_model_path)
    trained_params = nn.get_parameters(grad_only=False)

    test = True

    grad_model = functools.partial(setup_model,
                                   net_func=net_func,
                                   n_classes=n_classes,
                                   n_channels=n_channels,
                                   resize_size=resize_size,
                                   test=test,
                                   reduction='mean')
    solver = S.Momentum(lr=lr, momentum=0.9)
    solver.set_parameters(trained_params)
    # gradient
    u = compute_gradient(grad_model, solver, valset, test_batch_size, idx_val,
                         resize_size)

    # Hinv * u with SGD
    seed_train = 0
    v = dict()
    for key, param in nn.get_parameters(grad_only=False).items():
        v[key] = nn.Variable(param.d.shape, need_grad=True)
        v[key].d = 0
        v[key].g = 0

    solver.set_parameters(v)

    loss_train = []
    loss_fn = None
    for epoch in range(num_epochs):
        # training
        seed_train = 0
        np.random.seed(epoch)
        idx = get_batch_indices(ntr, batch_size, seed=epoch)
        for j, i in enumerate(idx):
            seeds = list(range(seed_train, seed_train + i.size))
            seed_train += i.size
            X, y = get_batch_data(trainset,
                                  idx_train,
                                  i,
                                  resize_size,
                                  test=False,
                                  seeds=seeds)
            _, loss_fn, input_image = adjust_batch_size(
                grad_model, len(X), loss_fn)
            input_image["image"].d = X
            input_image["label"].d = y
            loss_fn.forward()

            grad_params = nn.grad(loss_fn, [
                param for param in nn.get_parameters(grad_only=False).values()
            ])
            vg = 0
            for vv, g in zip(v.values(), grad_params):
                vg += F.sum(vv * g)

            for parameters in trained_params.values():
                parameters.grad.zero()

            vgrad_params = nn.grad(vg, [
                param for param in nn.get_parameters(grad_only=False).values()
            ])
            loss_i = 0
            for vgp, vv, uu in zip(vgrad_params, v.values(), u.values()):
                loss_i += 0.5 * F.sum(vgp * vv + alpha * vv * vv) - F.sum(
                    uu * vv)
            loss_i.forward()

            solver.zero_grad()
            loss_i.backward(clear_buffer=True)
            solver.update()
            loss_train.append(loss_i.d.copy())

    # influence
    infl_dict = dict()
    infl = np.zeros(ntr)
    for i in tqdm(range(ntr), desc='calc influence (3/3 steps)'):
        csv_idx = idx_train[i]
        file_name = trainset.get_filepath_to_data(csv_idx)
        file_name = os.path.join(input_dir_name, file_name)
        file_name = os.path.normpath(file_name)
        X, y = get_data(trainset, idx_train[i], resize_size, True, seed=i)
        _, loss_fn, input_image = adjust_batch_size(grad_model, len(X),
                                                    loss_fn)
        input_image["image"].d = X
        input_image["label"].d = y
        loss_fn.forward()
        for parameters in trained_params.values():
            parameters.grad.zero()
        loss_fn.backward(clear_buffer=True)
        infl_i = 0
        for j, param in enumerate(nn.get_parameters(grad_only=False).values()):
            infl_i += (param.g.copy() * list(v.values())[j].d.copy()).sum()
        infl[i] = -infl_i / ntr
        infl_dict[csv_idx] = [file_name, y, infl[i]]
    infl_list = [val + [key] for key, val in infl_dict.items()]
    infl_list = sorted(infl_list, key=lambda x: (x[-2]))

    # save
    header = ['x:image', 'y:label', 'influence', 'datasource_index']
    data_type = 'object,int,float,int'
    if need_evaluate:
        save_infl_for_analysis(infl_list, use_all_params, save_dir,
                               infl_filename, epoch, header, data_type)
    save_to_csv(filename=infl_filename,
                header=header,
                list_to_save=infl_list,
                data_type=data_type)
Ejemplo n.º 21
0
def infl_sgd(model_info_dict, file_dir_dict, use_all_params, need_evaluate):
    # params
    lr = model_info_dict['lr']
    seed = model_info_dict['seed']
    net_func = model_info_dict['net_func']
    batch_size = model_info_dict['batch_size']
    end_epoch = model_info_dict['end_epoch']
    target_epoch = model_info_dict['num_epochs']
    # files and dirs
    save_dir = file_dir_dict['save_dir']
    info_filename = file_dir_dict['info_filename']
    infl_filename = file_dir_dict['infl_filename']
    final_model_name = file_dir_dict['model_filename']
    final_model_path = os.path.join(save_dir, 'epoch%02d' % (target_epoch - 1),
                                    'weights', final_model_name)
    input_dir_name = os.path.dirname(file_dir_dict['train_csv'])

    # setup
    trainset, valset, image_shape, n_classes, ntr, nval = init_dataset(
        file_dir_dict['train_csv'], file_dir_dict['val_csv'], seed)
    n_channels, _h, _w = image_shape
    resize_size = get_image_size((_h, _w))
    idx_train = get_indices(ntr, seed)
    idx_val = get_indices(nval, seed)

    nn.load_parameters(final_model_path)
    trained_params = nn.get_parameters(grad_only=False)
    test = True

    grad_model = functools.partial(setup_model,
                                   net_func=net_func,
                                   n_classes=n_classes,
                                   n_channels=n_channels,
                                   resize_size=resize_size,
                                   test=test,
                                   reduction='sum')

    solver = S.Sgd(lr=lr)
    solver.set_parameters(trained_params)
    # gradient
    u = compute_gradient(grad_model, solver, valset, batch_size, idx_val,
                         target_epoch, resize_size)

    test = False
    infl_model = functools.partial(setup_model,
                                   net_func=net_func,
                                   n_classes=n_classes,
                                   n_channels=n_channels,
                                   resize_size=resize_size,
                                   test=test)
    # influence
    infl_dict = {}
    info = np.load(os.path.join(save_dir, info_filename), allow_pickle=True)
    loss_fn = None
    for epoch in tqdm(range(target_epoch - 1, end_epoch - 1, -1),
                      desc='calc influence (3/3 steps)'):
        for step_info in info[epoch][::-1]:
            idx, seeds, lr, step = step_info['idx'], step_info[
                'seeds'], step_info['lr'], step_info['step']
            fn = select_modelfile_for_infl(use_all_params, final_model_path,
                                           save_dir, epoch, step)
            _, loss_fn, input_image = adjust_batch_size(
                infl_model, solver, 1, loss_fn)
            nn.load_parameters(fn)
            params = nn.get_parameters(grad_only=False)
            solver = S.Sgd(lr=lr)
            solver.set_parameters(params)
            X = []
            y = []
            for i, seed in zip(idx, seeds):
                i = int(i)
                image, label = get_data(trainset,
                                        idx_train[i],
                                        resize_size,
                                        test,
                                        seed=seed)
                X.append(image)
                y.append(label)
                input_image["image"].d = image
                input_image["label"].d = label
                loss_fn.forward()
                solver.zero_grad()
                loss_fn.backward(clear_buffer=True)

                csv_idx = idx_train[i]
                infl = infl_dict.get(csv_idx, [0.0])[-1]
                for j, (key, param) in enumerate(
                        nn.get_parameters(grad_only=False).items()):
                    infl += lr * (u[key].d * param.g).sum() / idx.size

                # store infl
                file_name = trainset.get_filepath_to_data(csv_idx)
                file_name = os.path.join(input_dir_name, file_name)
                file_name = os.path.normpath(file_name)
                infl_dict[csv_idx] = [file_name, label, infl]

            # update u
            _, loss_fn, input_image = adjust_batch_size(
                infl_model, solver, len(idx), loss_fn)
            input_image["image"].d = X
            input_image["label"].d = np.array(y).reshape(-1, 1)
            loss_fn.forward()
            params = nn.get_parameters(grad_only=False)
            grad_params = {}
            for key, p in zip(params.keys(), nn.grad([loss_fn],
                                                     params.values())):
                grad_params[key] = p
            ug = 0
            # compute H[t]u[t]
            for key, uu in u.items():
                try:
                    ug += F.sum(uu * grad_params[key])
                except TypeError:
                    # cannot calc grad with batch normalization runnning mean and var
                    pass
            ug.forward()
            solver.zero_grad()
            ug.backward(clear_buffer=True)

            for j, (key, param) in enumerate(
                    nn.get_parameters(grad_only=False).items()):
                u[key].d -= lr * param.g / idx.size

        # sort by influence score
        infl_list = [val + [key] for key, val in infl_dict.items()]
        infl_list = sorted(infl_list, key=lambda x: (x[-2]))

        # save
        header = ['x:image', 'y:label', 'influence', 'datasource_index']
        data_type = 'object,int,float,int'
        if need_evaluate:
            save_infl_for_analysis(infl_list, use_all_params, save_dir,
                                   infl_filename, epoch, header, data_type)
    save_to_csv(filename=infl_filename,
                header=header,
                list_to_save=infl_list,
                data_type=data_type)
Ejemplo n.º 22
0
def sdf_feature_grad(implicit_network, x, conf):
    y = implicit_network(x, initial_sphere_radius=conf.initial_sphere_radius)
    sdf = y[..., 0:1]
    feature = y[..., 1:]
    grad = nn.grad([sdf], [x])[0]
    return sdf, feature, grad
Ejemplo n.º 23
0
def train(args):
    # Context
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Args
    latent = args.latent
    maps = args.maps
    batch_size = args.batch_size
    image_size = args.image_size
    lambda_ = args.lambda_

    # Model
    # generator loss
    z = nn.Variable([batch_size, latent])
    x_fake = generator(z, maps=maps, up=args.up).apply(persistent=True)
    p_fake = discriminator(x_fake, maps=maps)
    loss_gen = gan_loss(p_fake).apply(persistent=True)
    # discriminator loss
    p_fake = discriminator(x_fake, maps=maps)
    x_real = nn.Variable([batch_size, 3, image_size, image_size])
    p_real = discriminator(x_real, maps=maps)
    loss_dis = gan_loss(p_fake, p_real).apply(persistent=True)
    # gradient penalty
    eps = F.rand(shape=[batch_size, 1, 1, 1])
    x_rmix = eps * x_real + (1.0 - eps) * x_fake
    p_rmix = discriminator(x_rmix, maps=maps)
    x_rmix.need_grad = True  # Enabling gradient computation for double backward
    grads = nn.grad([p_rmix], [x_rmix])
    l2norms = [F.sum(g**2.0, [1, 2, 3])**0.5 for g in grads]
    gp = sum([F.mean((l - 1.0)**2.0) for l in l2norms])
    loss_dis += lambda_ * gp
    # generator with fixed value for test
    z_test = nn.Variable.from_numpy_array(np.random.randn(batch_size, latent))
    x_test = generator(z_test, maps=maps, test=True,
                       up=args.up).apply(persistent=True)

    # Solver
    solver_gen = S.Adam(args.lrg, args.beta1, args.beta2)
    solver_dis = S.Adam(args.lrd, args.beta1, args.beta2)

    with nn.parameter_scope("generator"):
        params_gen = nn.get_parameters()
        solver_gen.set_parameters(params_gen)
    with nn.parameter_scope("discriminator"):
        params_dis = nn.get_parameters()
        solver_dis.set_parameters(params_dis)

    # Monitor
    monitor = Monitor(args.monitor_path)
    monitor_loss_gen = MonitorSeries("Generator Loss", monitor, interval=10)
    monitor_loss_cri = MonitorSeries("Negative Critic Loss",
                                     monitor,
                                     interval=10)
    monitor_time = MonitorTimeElapsed("Training Time", monitor, interval=10)
    monitor_image_tile_train = MonitorImageTile("Image Tile Train",
                                                monitor,
                                                num_images=batch_size,
                                                interval=1,
                                                normalize_method=denormalize)
    monitor_image_tile_test = MonitorImageTile("Image Tile Test",
                                               monitor,
                                               num_images=batch_size,
                                               interval=1,
                                               normalize_method=denormalize)

    # Data Iterator
    di = data_iterator_cifar10(batch_size, True)

    # Train loop
    for i in range(args.max_iter):
        # Train discriminator
        x_fake.need_grad = False  # no need backward to generator
        for _ in range(args.n_critic):
            solver_dis.zero_grad()
            x_real.d = di.next()[0] / 127.5 - 1.0
            z.d = np.random.randn(batch_size, latent)
            loss_dis.forward(clear_no_need_grad=True)
            loss_dis.backward(clear_buffer=True)
            solver_dis.update()

        # Train generator
        x_fake.need_grad = True  # need backward to generator
        solver_gen.zero_grad()
        z.d = np.random.randn(batch_size, latent)
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.update()
        # Monitor
        monitor_loss_gen.add(i, loss_gen.d)
        monitor_loss_cri.add(i, -loss_dis.d)
        monitor_time.add(i)

        # Save
        if i % args.save_interval == 0:
            monitor_image_tile_train.add(i, x_fake)
            monitor_image_tile_test.add(i, x_test)
            nn.save_parameters(
                os.path.join(args.monitor_path, "params_{}.h5".format(i)))

    # Last
    x_test.forward(clear_buffer=True)
    nn.save_parameters(
        os.path.join(args.monitor_path, "params_{}.h5".format(i)))
    monitor_image_tile_train.add(i, x_fake)
    monitor_image_tile_test.add(i, x_test)
Ejemplo n.º 24
0
def test_dropout_double_backward(p, seed, ctx, func_name):
    from nnabla.backward_functions import registry
    from nnabla._dropout_workaround import _get_dropout_mask
    # dropout_backward depends on Dropout. The dependency must be kept by the
    # the execution order.
    # 1. Dropout::forward                  (A mask of dropout is calculated.)
    # 2. The forward of dropout_backward   (The mask is used.)
    # 3. The backward of dropout_backward  (The mask is used.)
    # 4. Dropout::backward                 (The mask is used, and then cleared.)
    # This order must be kept when using nnabla.grad. In the current
    # implementation, GradEndFunction keeps this order.
    atol_f = 1e-4

    with nn.context_scope(ctx):
        rng = np.random.RandomState(seed)
        init_x = rng.randn(2, 3, 4).astype(np.float32) * 2
        init_dy = rng.randn(*init_x.shape).astype(init_x.dtype)
        init_dy_for_grad = rng.randn(*init_x.shape).astype(init_x.dtype)
        init_dx = rng.randn(*init_x.shape).astype(init_x.dtype)
        init_for_dx2 = rng.randn(*init_x.shape).astype(init_x.dtype)

        #
        # A. Test mask passing
        #
        # Skip p=0 because, in the case, dropout does not happen. mask does not
        # change the results.
        if p != 0:
            with pytest.raises(RuntimeError):
                x = nn.Variable.from_numpy_array(init_x).apply(need_grad=True)
                dy = nn.Variable.from_numpy_array(init_dy).apply(
                    need_grad=True)
                # y = F.dropout(x, p, seed)  # Dropout is required to compute mask.
                dx = registry['Dropout']([dy, x], p, seed)

            # Note: y.forward() is required for dx.forward(). However this test
            #       is skipped because the random results are randomly matched
            #       between dx.forward() with and without y.forward(). Therefore
            #       The test result is not reproduced.

        #
        # B. Unit test of dropout_backward
        #
        # Graph construction
        x = nn.Variable.from_numpy_array(init_x).apply(need_grad=True)
        dy = nn.Variable.from_numpy_array(init_dy).apply(need_grad=True)
        y = F.dropout(x, p, seed)  # Dropout is required to compute mask.
        dx = registry['Dropout']([dy, x], p, seed)

        # Execution
        y.forward()  # Dropout is required to compute mask.
        # (y!=0) cannot be used when x includes 0.
        mask = _get_dropout_mask(x).d
        dx.forward()
        # Note: dropout_backward is a composite function. dx.parent is just
        #       a just composing function like MulScalar. Unit tests using
        #       dx.parent.forward and dx.parent.backward are meaningless.
        #       By the same reason, test of accumulation is nonsense.

        # Reference
        ref_dx = ref_dropout_backward(init_dy, mask, p)

        # Test
        assert_allclose(dx.d,
                        ref_dx,
                        atol=atol_f,
                        err_msg="Wrong output values of dropout_backward.")

        #
        # C. Test the forward of dropout_backward by using nnabla.grad
        #
        # Graph construction
        x = nn.Variable.from_numpy_array(init_x).apply(need_grad=True)
        y = F.dropout(x, p, seed)
        dx = nn.grad(y, x, grad_outputs=[init_dy_for_grad])[0]
        # Note: In NNabla 1.22.0, if use grad_outputs=X, nn.grad separate
        #       np.ndarray X into small arrays by self._force_list.
        #       For example, X = np.array([[5, 6], [7, 8]]) is separated
        #       into [np.array([5, 6]), np.array(7, 8)]. Then Mul2 inserted by
        #       nn.grad uses np.array([5, 6]) as dy, and broadcasts it to
        #       the np.array([[5, 6], [5, 6]]). Finally, the forward execution
        #       is finished, but the result values are wrong.

        # Execution
        dx.forward(clear_buffer=True)

        # Reference
        mask = _get_dropout_mask(x).d
        ref_dx = ref_dropout_backward(init_dy_for_grad, mask, p)

        # Test
        assert_allclose(dx.d,
                        ref_dx,
                        atol=atol_f,
                        err_msg="Wrong output values of Dropout of nn.grad.")

        #
        # D. Test the backward of dropout_backward by using nnabla.grad
        #
        # The numerical grad by using scipy.approx_fprime cannot be performed
        # because Dropout has randomness and changes the results during
        # the repeated forward computation.

        # Graph construction
        x = nn.Variable.from_numpy_array(init_x).apply(need_grad=True)
        y = F.dropout(x, p, seed)
        dx = nn.grad(y, x, grad_outputs=[init_dy_for_grad])[0]
        y_dx = y + dx  # replaceable with F.sink(y, dx, one_input_grad=False)

        # Execution
        x.g = init_dx  # Accumulation
        y_dx.forward(clear_no_need_grad=True)
        mask = _get_dropout_mask(x).d  # Store mask before the clear
        y_dx.backward(init_for_dx2, clear_buffer=True)

        # Reference
        ref_dx = ref_dropout_double_backward(init_for_dx2, mask, p) + init_dx

        # Test
        assert_allclose(x.g,
                        ref_dx,
                        atol=atol_f,
                        err_msg="Wrong output values of double backward of "
                        "Dropout by nn.grad.")
Ejemplo n.º 25
0
def inner_train_test(inputa, inputb, labela, labelb, data_generator,
                     meta_training, args):
    lossesa, lossesb, accuraciesa, accuraciesb = [], [], [], []
    if meta_training:
        num_updates = args.num_updates
        update_lr = args.train_update_lr
    else:
        num_updates = args.test_num_updates
        update_lr = args.update_lr

    # Training
    for inp in data_generator.next():
        inputa.d, inputb.d, labela.d, labelb.d = inp

        # Initialize network
        with nn.parameter_scope('meta'):
            resulta = net(inputa, labela, True, args)
            resultb = net(inputb, labelb, True, args)
            fast_weights = nn.get_parameters()

        # For saving training accuracies
        resulta[0].persistent = True
        resulta[1].persistent = True
        task_lossa_var = [
            resulta[0],
        ]
        task_accuracya_var = [
            resulta[1],
        ]

        # Inner loop
        for j in range(num_updates):
            grad_list = nn.grad(resulta[0], fast_weights.values())
            for ind, key in enumerate(fast_weights.keys()):
                if grad_list[ind] is None:
                    continue
                if args.first_order or not meta_training:
                    grad_list[ind].need_grad = False
                fast_weights[key] = fast_weights[key] - \
                    update_lr * grad_list[ind]

            resulta = net(inputa, labela, True, args, fast_weights)
            resulta[0].persistent = True
            resulta[1].persistent = True
            task_lossa_var.append(resulta[0])
            task_accuracya_var.append(resulta[1])

        # Loss on queries is calculated only at the end of the inner loop
        # Following the original implementation,
        # we always use batch stats for batch normalization even in a test phase
        resultb = net(inputb, labelb, True, args, fast_weights)

        # Forward calculation
        result_all = F.sink(resulta[0], resulta[1], resultb[0], resultb[1])
        result_all.forward()

        if meta_training:
            # Backward calculation
            lossb = resultb[0] / data_generator.batch_size
            lossb.backward(
            )  # gradients on weights are automatically accumlated

        task_lossa = []
        task_accuracya = []
        for j in range(num_updates + 1):
            task_accuracya_var[j].forward()
            task_lossa.append(task_lossa_var[j].d)
            task_accuracya.append(task_accuracya_var[j].d)

        lossesa.append(task_lossa)
        lossesb.append(resultb[0].d)
        accuraciesa.append(task_accuracya)
        accuraciesb.append(resultb[1].d)

    return lossesa, lossesb, accuraciesa, accuraciesb
Ejemplo n.º 26
0
def train(args):
    if args.c_dim != len(args.selected_attrs):
        print("c_dim must be the same as the num of selected attributes. Modified c_dim.")
        args.c_dim = len(args.selected_attrs)

    # Dump the config information.
    config = dict()
    print("Used config:")
    for k in args.__dir__():
        if not k.startswith("_"):
            config[k] = getattr(args, k)
            print("'{}' : {}".format(k, getattr(args, k)))

    # Prepare Generator and Discriminator based on user config.
    generator = functools.partial(
        model.generator, conv_dim=args.g_conv_dim, c_dim=args.c_dim, num_downsample=args.num_downsample, num_upsample=args.num_upsample, repeat_num=args.g_repeat_num)
    discriminator = functools.partial(model.discriminator, image_size=args.image_size,
                                      conv_dim=args.d_conv_dim, c_dim=args.c_dim, repeat_num=args.d_repeat_num)

    x_real = nn.Variable(
        [args.batch_size, 3, args.image_size, args.image_size])
    label_org = nn.Variable([args.batch_size, args.c_dim, 1, 1])
    label_trg = nn.Variable([args.batch_size, args.c_dim, 1, 1])

    with nn.parameter_scope("dis"):
        dis_real_img, dis_real_cls = discriminator(x_real)

    with nn.parameter_scope("gen"):
        x_fake = generator(x_real, label_trg)
    x_fake.persistent = True  # to retain its value during computation.

    # get an unlinked_variable of x_fake
    x_fake_unlinked = x_fake.get_unlinked_variable()

    with nn.parameter_scope("dis"):
        dis_fake_img, dis_fake_cls = discriminator(x_fake_unlinked)

    # ---------------- Define Loss for Discriminator -----------------
    d_loss_real = (-1) * loss.gan_loss(dis_real_img)
    d_loss_fake = loss.gan_loss(dis_fake_img)
    d_loss_cls = loss.classification_loss(dis_real_cls, label_org)
    d_loss_cls.persistent = True

    # Gradient Penalty.
    alpha = F.rand(shape=(args.batch_size, 1, 1, 1))
    x_hat = F.mul2(alpha, x_real) + \
        F.mul2(F.r_sub_scalar(alpha, 1), x_fake_unlinked)

    with nn.parameter_scope("dis"):
        dis_for_gp, _ = discriminator(x_hat)
    grads = nn.grad([dis_for_gp], [x_hat])

    l2norm = F.sum(grads[0] ** 2.0, axis=(1, 2, 3)) ** 0.5
    d_loss_gp = F.mean((l2norm - 1.0) ** 2.0)

    # total discriminator loss.
    d_loss = d_loss_real + d_loss_fake + args.lambda_cls * \
        d_loss_cls + args.lambda_gp * d_loss_gp

    # ---------------- Define Loss for Generator -----------------
    g_loss_fake = (-1) * loss.gan_loss(dis_fake_img)
    g_loss_cls = loss.classification_loss(dis_fake_cls, label_trg)
    g_loss_cls.persistent = True

    # Reconstruct Images.
    with nn.parameter_scope("gen"):
        x_recon = generator(x_fake_unlinked, label_org)
    x_recon.persistent = True

    g_loss_rec = loss.recon_loss(x_real, x_recon)
    g_loss_rec.persistent = True

    # total generator loss.
    g_loss = g_loss_fake + args.lambda_rec * \
        g_loss_rec + args.lambda_cls * g_loss_cls

    # -------------------- Solver Setup ---------------------
    d_lr = args.d_lr  # initial learning rate for Discriminator
    g_lr = args.g_lr  # initial learning rate for Generator
    solver_dis = S.Adam(alpha=args.d_lr, beta1=args.beta1, beta2=args.beta2)
    solver_gen = S.Adam(alpha=args.g_lr, beta1=args.beta1, beta2=args.beta2)

    # register parameters to each solver.
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())

    # -------------------- Create Monitors --------------------
    monitor = Monitor(args.monitor_path)
    monitor_d_cls_loss = MonitorSeries(
        'real_classification_loss', monitor, args.log_step)
    monitor_g_cls_loss = MonitorSeries(
        'fake_classification_loss', monitor, args.log_step)
    monitor_loss_dis = MonitorSeries(
        'discriminator_loss', monitor, args.log_step)
    monitor_recon_loss = MonitorSeries(
        'reconstruction_loss', monitor, args.log_step)
    monitor_loss_gen = MonitorSeries('generator_loss', monitor, args.log_step)
    monitor_time = MonitorTimeElapsed("Training_time", monitor, args.log_step)

    # -------------------- Prepare / Split Dataset --------------------
    using_attr = args.selected_attrs
    dataset, attr2idx, idx2attr = get_data_dict(args.attr_path, using_attr)
    random.seed(313)  # use fixed seed.
    random.shuffle(dataset)  # shuffle dataset.
    test_dataset = dataset[-2000:]  # extract 2000 images for test

    if args.num_data:
        # Use training data partially.
        training_dataset = dataset[:min(args.num_data, len(dataset) - 2000)]
    else:
        training_dataset = dataset[:-2000]
    print("Use {} images for training.".format(len(training_dataset)))

    # create data iterators.
    load_func = functools.partial(stargan_load_func, dataset=training_dataset,
                                  image_dir=args.celeba_image_dir, image_size=args.image_size, crop_size=args.celeba_crop_size)
    data_iterator = data_iterator_simple(load_func, len(
        training_dataset), args.batch_size, with_file_cache=False, with_memory_cache=False)

    load_func_test = functools.partial(stargan_load_func, dataset=test_dataset,
                                       image_dir=args.celeba_image_dir, image_size=args.image_size, crop_size=args.celeba_crop_size)
    test_data_iterator = data_iterator_simple(load_func_test, len(
        test_dataset), args.batch_size, with_file_cache=False, with_memory_cache=False)

    # Keep fixed test images for intermediate translation visualization.
    test_real_ndarray, test_label_ndarray = test_data_iterator.next()
    test_label_ndarray = test_label_ndarray.reshape(
        test_label_ndarray.shape + (1, 1))

    # -------------------- Training Loop --------------------
    one_epoch = data_iterator.size // args.batch_size
    num_max_iter = args.max_epoch * one_epoch

    for i in range(num_max_iter):
        # Get real images and labels.
        real_ndarray, label_ndarray = data_iterator.next()
        label_ndarray = label_ndarray.reshape(label_ndarray.shape + (1, 1))
        label_ndarray = label_ndarray.astype(float)
        x_real.d, label_org.d = real_ndarray, label_ndarray

        # Generate target domain labels randomly.
        rand_idx = np.random.permutation(label_org.shape[0])
        label_trg.d = label_ndarray[rand_idx]

        # ---------------- Train Discriminator -----------------
        # generate fake image.
        x_fake.forward(clear_no_need_grad=True)
        d_loss.forward(clear_no_need_grad=True)
        solver_dis.zero_grad()
        d_loss.backward(clear_buffer=True)
        solver_dis.update()

        monitor_loss_dis.add(i, d_loss.d.item())
        monitor_d_cls_loss.add(i, d_loss_cls.d.item())
        monitor_time.add(i)

        # -------------- Train Generator --------------
        if (i + 1) % args.n_critic == 0:
            g_loss.forward(clear_no_need_grad=True)
            solver_dis.zero_grad()
            solver_gen.zero_grad()
            x_fake_unlinked.grad.zero()
            g_loss.backward(clear_buffer=True)
            x_fake.backward(grad=None)
            solver_gen.update()
            monitor_loss_gen.add(i, g_loss.d.item())
            monitor_g_cls_loss.add(i, g_loss_cls.d.item())
            monitor_recon_loss.add(i, g_loss_rec.d.item())
            monitor_time.add(i)

            if (i + 1) % args.sample_step == 0:
                # save image.
                save_results(i, args, x_real, x_fake,
                             label_org, label_trg, x_recon)
                if args.test_during_training:
                    # translate images from test dataset.
                    x_real.d, label_org.d = test_real_ndarray, test_label_ndarray
                    label_trg.d = test_label_ndarray[rand_idx]
                    x_fake.forward(clear_no_need_grad=True)
                    save_results(i, args, x_real, x_fake, label_org,
                                 label_trg, None, is_training=False)

        # Learning rates get decayed
        if (i + 1) > int(0.5 * num_max_iter) and (i + 1) % args.lr_update_step == 0:
            g_lr = max(0, g_lr - (args.lr_update_step *
                                  args.g_lr / float(0.5 * num_max_iter)))
            d_lr = max(0, d_lr - (args.lr_update_step *
                                  args.d_lr / float(0.5 * num_max_iter)))
            solver_gen.set_learning_rate(g_lr)
            solver_dis.set_learning_rate(d_lr)
            print('learning rates decayed, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr))

    # Save parameters and training config.
    param_name = 'trained_params_{}.h5'.format(
        datetime.datetime.today().strftime("%m%d%H%M"))
    param_path = os.path.join(args.model_save_path, param_name)
    nn.save_parameters(param_path)
    config["pretrained_params"] = param_name

    with open(os.path.join(args.model_save_path, "training_conf_{}.json".format(datetime.datetime.today().strftime("%m%d%H%M"))), "w") as f:
        json.dump(config, f)

    # -------------------- Translation on test dataset --------------------
    for i in range(args.num_test):
        real_ndarray, label_ndarray = test_data_iterator.next()
        label_ndarray = label_ndarray.reshape(label_ndarray.shape + (1, 1))
        label_ndarray = label_ndarray.astype(float)
        x_real.d, label_org.d = real_ndarray, label_ndarray

        rand_idx = np.random.permutation(label_org.shape[0])
        label_trg.d = label_ndarray[rand_idx]

        x_fake.forward(clear_no_need_grad=True)
        save_results(i, args, x_real, x_fake, label_org,
                     label_trg, None, is_training=False)
Ejemplo n.º 27
0
def disc_r1_loss(real_disc_out, real_img):
    gradient = nn.grad([F.sum(real_disc_out)], [real_img])[0]
    gradient_penalty = F.pow_scalar(gradient, 2)
    gradient_penalty = F.reshape(gradient_penalty, (gradient.shape[0], -1))
    return F.mean(F.sum(gradient_penalty, axis=1))