コード例 #1
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[0]:
            if accum[0]:
                g_x0 += g_dx0 * dx0 * F.less_scalar(x0, 0.0)
            else:
                g_x0.copy_from(g_dx0 * dx0 * F.less_scalar(x0, 0.0))
        if prop_down[1]:
            inp = nn.Variable(x0.shape).apply(data=x0,
                                              grad=g_dy,
                                              need_grad=True)
            out = nn.Variable(dy.shape).apply(grad=g_dx0)
            self.forward_func.backward([inp], [out], accum=[accum[1]])
コード例 #2
0
def huber_loss_backward(inputs, delta=1.0):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    x1 = inputs[2]
    d = x0 - x1
    m0 = F.less_scalar(F.abs(d), delta)
    m1 = 1 - m0
    mg = F.greater(x0, x1)
    ml = 1 - mg
    m0 = no_grad(m0)
    m1 = no_grad(m1)
    mg = no_grad(mg)
    ml = no_grad(ml)
    t0 = 2 * d * m0
    t1 = 2 * delta * m1 * mg
    t2 = -2 * delta * m1 * ml
    dx0 = dy * (t0 + t1 + t2)
    dx1 = -dx0
    return dx0, dx1
コード例 #3
0
def sample_pdf(bins, weights, N_samples, det=False):
    """Sample additional points for training fine network

    Args:
      bins: int. Height in pixels.
      weights: int. Width in pixels.
      N_samples: float. Focal length of pinhole camera.
      det

    Returns:
      samples: array of shape [batch_size, 3]. Depth samples for fine network
    """
    weights += 1e-5
    pdf = weights / F.sum(weights, axis=-1, keepdims=True)

    cdf = F.cumsum(pdf, axis=-1)
    # if isinstance(pdf, nn.Variable):
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1))
    # else:
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data
    cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1)

    if det:
        u = F.arange(0., 1., 1 / N_samples)
        u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, ))
        u = u.data if isinstance(cdf, nn.NdArray) else u
    else:
        u = F.rand(shape=cdf.shape[:-1] + (N_samples, ))

    indices = F.searchsorted(cdf, u, right=True)
    # if isinstance(cdf, nn.Variable):
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.d, u.d, side='right').numpy())
    # else:
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.data, u.data, side='right').numpy())
    below = F.maximum_scalar(indices - 1, 0)
    above = F.minimum_scalar(indices, cdf.shape[-1] - 1)
    indices_g = F.stack(below, above, axis=below.ndim)
    cdf_g = F.gather(cdf,
                     indices_g,
                     axis=-1,
                     batch_dims=len(indices_g.shape) - 2)
    bins_g = F.gather(bins,
                      indices_g,
                      axis=-1,
                      batch_dims=len(indices_g.shape) - 2)

    denom = (cdf_g[..., 1] - cdf_g[..., 0])
    denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape),
                    denom)
    t = (u - cdf_g[..., 0]) / denom
    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])

    return samples
コード例 #4
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        base_axis = self.forward_func.info.args["base_axis"]
        # Inputs
        x = inputs[0].data
        w = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx = outputs[0].data
        dw = outputs[1].data
        # Grads of inputs
        g_x = inputs[0].grad
        g_w = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx = outputs[0].grad
        g_dw = outputs[1].grad

        # Computation
        shared = True if w.shape == () else False
        shape = [x.shape[i] if i == base_axis else 1 for i in range(x.ndim)]
        axes = [i for i in range(x.ndim)]
        _ = axes.pop(base_axis) if not shared else None

        def reshape(v, shape):
            if shared:
                shape = [1 for _ in range(len(shape))]
            return F.reshape(v, shape)

        if prop_down[0] or prop_down[1]:
            nmask = F.less_scalar(x, 0.0)
        if prop_down[0]:
            if accum[0]:
                g_x += dy * reshape(g_dw, shape) * nmask
            else:
                g_x.copy_from(dy * reshape(g_dw, shape) * nmask)
        if prop_down[1]:
            if accum[1]:
                g_w += F.sum(dy * g_dx * nmask, axes)
            else:
                g_w.copy_from(F.sum(dy * g_dx * nmask, axes))
        if prop_down[2]:
            pmask = 1.0 - nmask
            g_dx_ = g_dx * (pmask + reshape(w, shape) * nmask) + \
                reshape(g_dw, shape) * x * nmask
            if accum[2]:
                g_dy += g_dx_
            else:
                g_dy.copy_from(g_dx_)
コード例 #5
0
def lab2rgb(input):
    input_trans = F.split(input, axis=1)
    L, a, b = F.split(input, axis=1)
    y = (L + 16.0) / 116.0
    x = (a / 500.0) + y
    z = y - (b / 200.0)
    neg_mask = F.less_scalar(z, 0).apply(need_grad=False)
    z = z * F.logical_not(neg_mask)
    mask_Y = F.greater_scalar(y, 0.2068966).apply(need_grad=False)
    mask_X = F.greater_scalar(x, 0.2068966).apply(need_grad=False)
    mask_Z = F.greater_scalar(z, 0.2068966).apply(need_grad=False)
    Y_1 = (y ** 3) * mask_Y
    Y_2 = L / (116. * 7.787) * F.logical_not(mask_Y)
    var_Y = Y_1 + Y_2

    X_1 = (x ** 3) * mask_X
    X_2 = (x - 16. / 116.) / 7.787 * F.logical_not(mask_X)
    var_X = X_1 + X_2

    Z_1 = (z ** 3) * mask_Z
    Z_2 = (z - 16. / 116.) / 7.787 * F.logical_not(mask_Z)
    var_Z = Z_1 + Z_2

    X = 0.95047 * var_X
    Y = 1.00000 * var_Y
    Z = 1.08883 * var_Z

    var_R = X * 3.2406 + Y * -1.5372 + Z * -0.4986
    var_G = X * -0.9689 + Y * 1.8758 + Z * 0.0415
    var_B = X * 0.0557 + Y * -0.2040 + Z * 1.0570

    mask_R = F.greater_scalar(var_R, 0.0031308).apply(need_grad=False)
    n_mask_R = F.logical_not(mask_R)
    R_1 = (1.055 * (F.maximum2(var_R, n_mask_R) ** (1 / 2.4)) - 0.055) * mask_R
    R_2 = (12.92 * var_R) * n_mask_R
    var_R = R_1 + R_2

    mask_G = F.greater_scalar(var_G, 0.0031308).apply(need_grad=False)
    n_mask_G = F.logical_not(mask_G)
    G_1 = (1.055 * (F.maximum2(var_G, n_mask_G) ** (1 / 2.4)) - 0.055) * mask_G
    G_2 = (12.92 * var_G) * n_mask_G
    var_G = G_1 + G_2

    mask_B = F.greater_scalar(var_B, 0.0031308).apply(need_grad=False)
    n_mask_B = F.logical_not(mask_B)
    B_1 = (1.055 * (F.maximum2(var_B, n_mask_B) ** (1 / 2.4)) - 0.055) * mask_B
    B_2 = (12.92 * var_B) * n_mask_B
    var_B = B_1 + B_2
    return F.stack(var_R, var_G, var_B, axis=1)
コード例 #6
0
def minimum_scalar_backward(inputs, val=1.0):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    m0 = F.less_scalar(x0, val)
    m0 = no_grad(m0)
    dx = dy * m0
    return dx
コード例 #7
0
def binary_sigmoid_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    m0 = F.less_scalar(F.abs(x0), 1.0)
    m0 = no_grad(m0)
    dx0 = dy * m0 * 0.5
    return dx0
コード例 #8
0
ファイル: huber_loss.py プロジェクト: zeta1999/nnabla
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        delta = self.forward_func.info.args["delta"]
        # Inputs
        x0 = inputs[0].data
        x1 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dx1 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_x1 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dx1 = outputs[1].grad

        # Computation
        if prop_down[0] or prop_down[1] or prop_down[2]:
            mask = F.less_scalar(F.abs(x0 - x1), delta)

        if prop_down[0]:
            if accum[0]:
                g_x0 += mask * 2 * dy * (g_dx0 - g_dx1)
            else:
                g_x0.copy_from(mask * 2 * dy * (g_dx0 - g_dx1))
        if prop_down[1]:
            if accum[1]:
                g_x1 += mask * 2 * dy * (g_dx1 - g_dx0)
            else:
                g_x1.copy_from(mask * 2 * dy * (g_dx1 - g_dx0))
        if prop_down[2]:
            # Simply using " / dy" causes the numerical instability
            diff = x0 - x1
            pmask = F.greater_scalar(diff, 0.0)
            nmask = (1.0 - pmask)
            omask = (1.0 - mask)
            g_dx_diff = g_dx0 - g_dx1
            g_dy_ = 2.0 * g_dx_diff * \
                (diff * mask + delta * omask * (pmask - nmask))
            if accum[2]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
コード例 #9
0
ファイル: losses.py プロジェクト: sony/nnabla-examples
def gaussian_log_likelihood(x, mean, logstd, orig_max_val=255):
    """
    Compute the log-likelihood of a Gaussian distribution for given data `x`.

    Args:
        x (nn.Variable): Target data. It is assumed that the values are ranged [-1, 1],
                         which are originally [0, orig_max_val].
        means (nn.Variable): Gaussian mean. Must be the same shape as x.
        logstd (nn.Variable): Gaussian log standard deviation. Must be the same shape as x.
        orig_max_val (int): The maximum value that x originally has before being rescaled.

    Return:
        A log probabilies of x in nats.
    """
    assert x.shape == mean.shape == logstd.shape
    centered_x = x - mean
    inv_std = F.exp(-logstd)
    half_bin = 1.0 / orig_max_val

    def clamp(val):
        # Here we don't need to clip max
        return F.clip_by_value(val, min=1e-12, max=1e8)

    # x + 0.5 (in original scale)
    plus_in = inv_std * (centered_x + half_bin)
    cdf_plus = approx_standard_normal_cdf(plus_in)
    log_cdf_plus = F.log(clamp(cdf_plus))

    # x - 0.5 (in original scale)
    minus_in = inv_std * (centered_x - half_bin)
    cdf_minus = approx_standard_normal_cdf(minus_in)
    log_one_minus_cdf_minus = F.log(clamp(1.0 - cdf_minus))

    log_cdf_delta = F.log(clamp(cdf_plus - cdf_minus))

    log_probs = F.where(
        F.less_scalar(x, -0.999),
        log_cdf_plus,  # Edge case for 0. It uses cdf for -inf as cdf_minus.
        F.where(F.greater_scalar(x, 0.999),
                # Edge case for orig_max_val. It uses cdf for +inf as cdf_plus.
                log_one_minus_cdf_minus,
                log_cdf_delta  # otherwise
                )
    )

    assert log_probs.shape == x.shape
    return log_probs
コード例 #10
0
def hard_sigmoid_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    m0 = F.greater_scalar(x0, -2.5)
    m1 = F.less_scalar(x0, 2.5)
    m01 = m0 * m1
    m01 = no_grad(m01)
    dx0 = dy * 0.2 * m01
    return dx0
コード例 #11
0
ファイル: minimum_scalar.py プロジェクト: yati989/nnabla
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        val = self.forward_func.info.args["val"]
        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[1]:
            mask = F.less_scalar(x0, val)
            if accum[1]:
                g_dy += g_dx0 * mask
            else:
                g_dy.copy_from(g_dx0 * mask)
コード例 #12
0
ファイル: main.py プロジェクト: sony/nnabla-examples
def main():

    random.seed(args.seed)
    np.random.seed(args.seed)

    # Prepare for CUDA.
    ctx = get_extension_context('cudnn', device_id=args.gpus)
    nn.set_default_context(ctx)

    start_full_time = time.time()
    from iterator import data_iterator

    # Data list for sceneflow data set
    train_list = "./dataset/sceneflow_train.csv"
    test_list = "./dataset/sceneflow_test.csv"
    train = True
    validation = True

    # Set monitor path.
    monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S"))

    img_left, img_right, disp_img = read_csv(train_list)
    img_left_test, img_right_test, disp_img_test = read_csv(test_list)
    train_samples = len(img_left)
    test_samples = len(img_left_test)
    train_size = int(len(img_left) / args.batchsize_train)
    test_size = int(len(img_left_test) / args.batchsize_test)

    # Create data iterator.
    data_iterator_train = data_iterator(
        train_samples, args.batchsize_train, img_left, img_right, disp_img, train=True, shuffle=True, dataset=args.dataset)
    data_iterator_test = data_iterator(
        test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, train=False, shuffle=False, dataset=args.dataset)

    # Set data size

    print(train_size, test_size)

    # Define data shape for training.
    var_left = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_right = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_disp = nn.Variable(
        (args.batchsize_train, 1, args.crop_height, args.crop_width))
    # Define data shape for testing.
    var_left_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_right_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_disp_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))
    mask_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))

    if args.loadmodel is not None:
        # Loading CNN pretrained parameters.
        nn.load_parameters(args.loadmodel)

    # === for Training ===
    # Definition of pred
    pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True)
    mask_train = F.less_scalar(var_disp, args.maxdisp)
    sum_mask = F.maximum_scalar(F.sum(mask_train), 1)
    # Definition of loss
    loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss(
        pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask))

    # === for Testing ===
    # Definition of pred
    mask_test = F.less_scalar(var_disp_test, args.maxdisp)
    sum_mask_test = F.maximum_scalar(F.sum(mask_test), 1)
    pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False)
    test_loss = F.sum(F.abs(pred_test - var_disp_test)*mask_test)/sum_mask_test

    # Prepare monitors.
    monitor = Monitor(monitor_path)
    monitor_train = MonitorSeries('Training loss', monitor, interval=1)
    monitor_test = MonitorSeries('Validation loss', monitor, interval=1)
    monitor_time_train = MonitorTimeElapsed(
        "Training time/epoch", monitor, interval=1)

    # Create a solver (parameter updater)
    solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999)

    # Set Parameters
    params = nn.get_parameters()
    solver.set_parameters(params)
    params2 = nn.get_parameters(grad_only=False)
    solver.set_parameters(params2)

    for epoch in range(1, args.epochs+1):
        print('This is %d-th epoch' % (epoch))

        if validation:
            ## teting ##
            total_test_loss = 0

            index_test = 0
            while index_test < test_size:
                var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next()
                test_loss.forward(clear_no_need_grad=True)
                total_test_loss += test_loss

                print('Iter %d test loss = %.3f' % (index_test, test_loss.d))
                index_test += 1
            test_error = total_test_loss/test_size
            print('epoch %d total 3-px error in val = %.3f' %
                  (epoch, test_error.d))
            # Pass validation loss to a monitor.
            monitor_test.add(epoch, test_error)

        if train:
            ## training ##
            total_train_loss = 0
            index = 0

            while index < train_size:

                # Get mini batch
                # Preprocess
                var_left.d, var_right.d, var_disp.d = data_iterator_train.next()
                loss.forward(clear_no_need_grad=True)
                # Initialize gradients
                solver.zero_grad()
                # Backward execution
                loss.backward(clear_buffer=True)
                # Update parameters by computed gradients
                solver.update()
                print('Iter %d training loss = %.3f' %
                      (index, loss.d))
                total_train_loss += loss.d
                index += 1
            train_error = total_train_loss/train_size
            monitor_time_train.add(epoch)
            print('epoch %d total training loss = %.3f' % (epoch, train_error))

            # Pass training loss to a monitor.
            monitor_train.add(epoch, train_error)
            print('full training time = %.2f HR' %
                  ((time.time() - start_full_time)/3600))

            # Save Parameter
            out_param_file = os.path.join(
                args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5')
            nn.save_parameters(out_param_file)
コード例 #13
0
    def bidirectional_sphere_trace(self, camloc, raydir, t_start, t_finish):
        t_f = F.identity(t_start)
        x_f = camloc + t_f * raydir
        s_f = self.sdf(x_f)
        mask_hit_eps_f = 0 * F.identity(t_f)

        t_b = F.identity(t_finish)
        x_b = camloc + t_b * raydir
        s_b = self.sdf(x_b)
        mask_hit_eps_b = 0 * F.identity(t_b)

        for i in range(self.sphere_trace_itr - 1):
            # Forward direction
            mask_hit_eps_f_i = F.less_equal_scalar(F.abs(s_f), self.eps)
            mask_hit_eps_f += (1 - mask_hit_eps_f) * mask_hit_eps_f_i
            t_f += (1 - mask_hit_eps_f) * s_f
            x_f = camloc + t_f * raydir

            s_f_prev = F.identity(s_f)
            s_f = self.sdf(x_f)
            mask_pos_f_prev = (1 - mask_hit_eps_f) * \
                F.greater_scalar(s_f_prev, 0)
            mask_neg_f = (1 - mask_hit_eps_f) * F.less_scalar(s_f, 0)
            mask_revert_f = mask_pos_f_prev * mask_neg_f
            t_f -= mask_revert_f * s_f_prev
            s_f = mask_revert_f * s_f_prev + (1 - mask_revert_f) * s_f

            # Backward direction
            mask_hit_eps_b_i = F.less_equal_scalar(F.abs(s_b), self.eps)
            mask_hit_eps_b += (1 - mask_hit_eps_b) * mask_hit_eps_b_i
            t_b -= (1 - mask_hit_eps_b) * s_b
            x_b = camloc + t_b * raydir

            s_b_prev = F.identity(s_b)
            s_b = self.sdf(x_b)
            mask_pos_b_prev = (1 - mask_hit_eps_b) * \
                F.greater_scalar(s_b_prev, 0)
            mask_neg_b = (1 - mask_hit_eps_b) * F.less_scalar(s_b, 0)
            mask_revert_b = mask_pos_b_prev * mask_neg_b
            t_b += mask_revert_b * s_b_prev
            s_b = mask_revert_b * s_b_prev + (1 - mask_revert_b) * s_b

            ## print("s_f neg", np.sum(s_f.data < 0))
            ## print("s_b neg", np.sum(s_b.data < 0))

        # Fine grained start/finish points
        t_f0 = t_f
        t_f1 = t_f + mask_revert_f * s_f_prev
        x_hit_st0 = camloc + t_f0 * raydir
        ## x0, x1 = self.post_method(x_hit_st0, camloc + t_f1 * raydir)
        ## t_f0 = F.norm((x0 - camloc), axis=(x0.ndim - 1), keepdims=True)
        ## t_f1 = F.norm((x1 - camloc), axis=(x1.ndim - 1), keepdims=True)
        mask_hit_f1b = mask_revert_f * F.less(t_f1, t_b)
        t_b = t_f1 * mask_hit_f1b + t_b * (1 - mask_hit_f1b)

        # Reverse the opposite case
        mask_fb = F.less(t_f, t_b)
        t_f = t_f * mask_fb + t_start * (1 - mask_fb)
        t_b = t_b * mask_fb + t_finish * (1 - mask_fb)

        return x_hit_st0, t_f, t_b, mask_hit_eps_f
コード例 #14
0
ファイル: degree.py プロジェクト: HiroakiMikami/nnabla-ggnn
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-train-examples", type=int, default=1600)
    parser.add_argument("--num-valid-examples", type=int, default=100)
    parser.add_argument("--accum-grad", type=int, default=32)
    parser.add_argument("--max-iter", type=int, default=6400)
    parser.add_argument("--valid-interval", type=int, default=100)
    parser.add_argument("--context", type=str, default="cpu")
    parser.add_argument("--device-id", type=int, default=0)

    args = parser.parse_args()

    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    ctx = get_extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # prepare dataset
    tdataset = []
    for i in range(args.num_train_examples):
        V, E = random_graph(rng)
        deg = degrees(V, E)
        tdataset.append(([V], [utils.from_adjacency_list(E)], [deg]))

    vdataset = []
    for i in range(args.num_valid_examples):
        V, E = random_graph(rng)
        deg = degrees(V, E)
        vdataset.append(([V], [utils.from_adjacency_list(E)], [deg]))

    # prepare data iterator
    tdata = data_iterator(SimpleDataSource2(tdataset, shuffle=True), 1, False,
                          False, False)
    vdata = data_iterator(SimpleDataSource2(vdataset, shuffle=False), 1, False,
                          False, False)

    # prepare monitors
    monitor = M.Monitor("./degree")
    tloss = M.MonitorSeries("Training Loss", monitor, interval=10)

    verror = M.MonitorSeries("Validation Error", monitor, interval=10)

    # prepare solver
    solver = S.Adam()

    # training loop
    for i in range(args.max_iter):
        l = 0
        for b in range(args.accum_grad):
            # read data
            V, E, degree = tdata.next()
            V = V[0][0]
            E = E[0][0]
            degree = degree[0][0]

            # predict
            output = predict(V, E)

            # initialize solver
            if i == 0 and b == 0:
                solver.set_parameters(nn.get_parameters())

            # calculate loss
            label = nn.Variable(degree.shape)
            label.data.data = degree
            label = F.reshape(label, (len(V), 1))
            loss = F.mean(F.squared_error(output, label))

            # training
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            l += loss.data.data

        solver.update()

        tloss.add(i, l / args.accum_grad)
        l = 0

        if i % args.valid_interval == 0:
            # validation
            # read data
            e = 0
            n = 0
            for b in range(vdata.size):
                V, E, degree = vdata.next()
                V = V[0][0]
                E = E[0][0]
                degree = degree[0][0]

                output = predict(V, E)

                label = nn.Variable(degree.shape)
                label.data.data = degree
                label = F.reshape(label, (len(V), 1))
                error = F.sum(F.less_scalar(F.abs(F.sub2(output, label)), 0.5))

                error.forward()

                e += error.data.data
                n += len(V)
            verror.add(i, e / n)