Exemple #1
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        epsilon = self.forward_func.info.args["epsilon"]

        # Inputs
        x0 = inputs[0].data
        x1 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dx1 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_x1 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dx1 = outputs[1].grad

        # Computation
        if prop_down[2]:
            # Simply using " / dy" causes the numerical instability
            diff = x0 - x1
            mask = F.greater_scalar(F.abs(diff), epsilon)
            maskp = F.greater_scalar(diff, 0.0)
            maskn = 1.0 - maskp
            g_dy_ = (g_dx0 - g_dx1) * (maskp - maskn) * mask
            if accum[2]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
def epsilon_insensitive_loss_backward(inputs, epsilon):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    x1 = inputs[2]
    d = x0 - x1
    m0 = F.greater_scalar(F.abs(d), epsilon)
    m1 = 1 - m0
    mg = F.greater(x0, x1)
    ml = 1 - mg
    m0 = no_grad(m0)
    mg = no_grad(mg)
    ml = no_grad(ml)
    t0 = m0 * mg
    t1 = -m0 * ml
    dx0 = dy * (t0 + t1)
    dx1 = -dx0
    return dx0, dx1
Exemple #3
0
def norm_normalization_backward(inputs, p=None, axes=None, eps=1e-12):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]

    if p is None:
        p = 2.0
    axes = list(range(x0.ndim)) if axes is None else force_list(axes)

    x_abs = F.abs(x0)
    x_pow = F.pow_scalar(x_abs, p)
    x_sum = F.sum(x_pow, axes, keepdims=True)
    # x_norm = x_sum ** (1./p)

    # Div2 backward
    dx = dy * x_sum**(-1. / p)
    dx_norm = -dy * x0 * x_sum**(-2. / p)
    dx_norm = sum_for_arithmetics(dx_norm, x_sum)

    # Norm backward
    x_sign = no_grad(F.sign(x0))
    dx += dx_norm * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign

    return dx
Exemple #4
0
def norm_backward(inputs, p=None, axes=None, keep_dims=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]

    if p is None:
        p = 2.0
    axes = list(range(x0.ndim)) if axes is None else force_list(axes)

    x_abs = F.abs(x0)
    x_pow = F.pow_scalar(x_abs, p)
    x_sum = F.sum(x_pow, axes, keepdims=True)

    # Add axis for mul2
    if not keep_dims:
        shape = list(x0.shape)
        for a in axes:
            shape[a] = 1
        dy = dy.reshape(shape)

    x_sign = no_grad(F.sign(x0))
    dx = dy * x_sum**(1. / p - 1.) * x_abs**(p - 1.) * x_sign

    return dx
Exemple #5
0
def huber_loss_backward(inputs, delta=1.0):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    x1 = inputs[2]
    d = x0 - x1
    m0 = F.less_scalar(F.abs(d), delta)
    m1 = 1 - m0
    mg = F.greater(x0, x1)
    ml = 1 - mg
    m0 = no_grad(m0)
    m1 = no_grad(m1)
    mg = no_grad(mg)
    ml = no_grad(ml)
    t0 = 2 * d * m0
    t1 = 2 * delta * m1 * mg
    t2 = -2 * delta * m1 * ml
    dx0 = dy * (t0 + t1 + t2)
    dx1 = -dx0
    return dx0, dx1
Exemple #6
0
    def warp_coordinates(self, coordinates):
        theta = self.theta
        theta = F.reshape(
            theta, theta.shape[:1] + (1,) + theta.shape[1:], inplace=False)
        if coordinates.shape[0] == self.bs:
            transformed = F.batch_matmul(
                            F.tile(theta[:, :, :, :2],
                                   (1, coordinates.shape[1], 1, 1)),
                            F.reshape(coordinates, coordinates.shape + (1,), inplace=False)) + theta[:, :, :, 2:]
        else:
            transformed = F.batch_matmul(
                            F.tile(theta[:, :, :, :2],
                                   (1, coordinates.shape[1], 1, 1)),
                            F.tile(F.reshape(coordinates, coordinates.shape + (1,), inplace=False),
                                   (self.bs / coordinates.shape[0], 1, 1, 1))) + theta[:, :, :, 2:]
        transformed = F.reshape(
            transformed, transformed.shape[:-1], inplace=False)

        if self.tps:
            control_points = self.control_points
            control_params = self.control_params
            distances = F.reshape(
                coordinates, (coordinates.shape[0], -1, 1, 2), inplace=False) - F.reshape(control_points, (1, 1, -1, 2))
            distances = F.sum(F.abs(distances), axis=distances.ndim - 1)

            result = distances ** 2
            result = result * F.log(distances + 1e-6)
            result = result * control_params
            result = F.sum(result, axis=2)
            result = F.reshape(
                result, (self.bs, coordinates.shape[1], 1), inplace=False)
            transformed = transformed + result

        return transformed
Exemple #7
0
def sample_noise(inpt_size, out_size):
    _f = lambda x: F.sign(x) * F.pow_scalar(F.abs(x), 0.5)
    noise = _f(F.randn(shape=(inpt_size + out_size, )))
    eps_w = F.batch_matmul(F.reshape(noise[:inpt_size], (1, -1)),
                           F.reshape(noise[inpt_size:], (1, -1)), True)
    eps_b = noise[inpt_size:]
    return eps_w, eps_b
Exemple #8
0
	def forward(self, x):
		N, C, H, W = x.shape

		log_abs = F.log(F.abs(self.scale))
		logdet = H*W*F.sum(log_abs)

		if self.logdet:
			return self.scale * (x + self.loc), logdet
		else:
			return self.scale * (x + self.loc)
Exemple #9
0
    def forward_impl(self, inputs, outputs):
        x = inputs[0].data
        M = inputs[1].data
        y = outputs[0].data
        y.copy_from(x)

        if not self.training:
            return
        Mb = F.max(F.abs(x), keepdims=True)
        F.maximum2(M, Mb, outputs=[M])
Exemple #10
0
def softsign_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    dx0 = dy * (1 / (1 + F.abs(x0))**2)
    return dx0
Exemple #11
0
def binary_tanh_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    m0 = F.less_scalar(F.abs(x0), 1.0)
    m0 = no_grad(m0)
    dx0 = dy * m0
    return dx0
Exemple #12
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        delta = self.forward_func.info.args["delta"]
        # Inputs
        x0 = inputs[0].data
        x1 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dx1 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_x1 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dx1 = outputs[1].grad

        # Computation
        if prop_down[0] or prop_down[1] or prop_down[2]:
            mask = F.less_scalar(F.abs(x0 - x1), delta)

        if prop_down[0]:
            if accum[0]:
                g_x0 += mask * 2 * dy * (g_dx0 - g_dx1)
            else:
                g_x0.copy_from(mask * 2 * dy * (g_dx0 - g_dx1))
        if prop_down[1]:
            if accum[1]:
                g_x1 += mask * 2 * dy * (g_dx1 - g_dx0)
            else:
                g_x1.copy_from(mask * 2 * dy * (g_dx1 - g_dx0))
        if prop_down[2]:
            # Simply using " / dy" causes the numerical instability
            diff = x0 - x1
            pmask = F.greater_scalar(diff, 0.0)
            nmask = (1.0 - pmask)
            omask = (1.0 - mask)
            g_dx_diff = g_dx0 - g_dx1
            g_dy_ = 2.0 * g_dx_diff * \
                (diff * mask + delta * omask * (pmask - nmask))
            if accum[2]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
Exemple #13
0
def secant(x0, x1, implicit_function, max_post_itr, eps=1e-16):
    f0 = implicit_function(x0)  # > 0
    f1 = implicit_function(x1)  # < 0

    for i in range(max_post_itr):
        nu = f0 * (x1 - x0)
        de = f1 - f0
        mask0 = F.greater_scalar(F.abs(de), eps)
        mask1 = 1 - mask0
        nu = mask0 * nu + mask1 * 0
        de = mask0 * de + mask1 * 1

        xm = x0 - nu / de
        fm = implicit_function(xm)

        mp = F.greater_equal_scalar(fm, 0)
        mn = 1 - mp
        x0 = mp * xm + mn * x0
        f0 = mp * fm + mn * f0
        x1 = mn * xm + mp * x1
        f1 = mn * fm + mp * f1
    return x0, x1
Exemple #14
0
def invertible_conv(x, reverse, rng, scope):
    r"""Invertible 1x1 Convolution Layer.

    Args:
        x (nn.Variable): Input variable.
        reverse (bool): Whether it's a reverse direction.
        rng (numpy.random.RandomState): A random generator.
        scope (str): The scope.

    Returns:
        nn.Variable: The output variable.
    """
    batch_size, c, n_groups = x.shape
    with nn.parameter_scope(scope):
        # initialize w by an orthonormal matrix
        w_init = np.linalg.qr(rng.randn(c, c))[0][None, ...]
        W_var = get_parameter_or_create("W", (1, c, c), w_init, True, True)
        W = F.batch_inv(W_var) if reverse else W_var
        x = F.convolution(x, F.reshape(W, (c, c, 1)), None, stride=(1, ))
    if reverse:
        return x
    log_det = batch_size * n_groups * F.log(F.abs(F.batch_det(W)))
    return x, log_det
def get_d_layer(real_layers, fake_layers):
    """
    discriminator layer loss
    """
    fix_range = 0.02  # hard coded, all layers are roughly scaled to this value
    sum_layer_loss = 0  # adds-on for generator
    layer_loss_list = []
    layer_n = len(real_layers)

    # hard coded, an overall average of all layers
    layer_norm = [12.0, 14.0, 24.0, 100.0]

    for layer_i in range(layer_n):
        real_layer = real_layers[layer_i]
        false_layer = fake_layers[layer_i]

        layer_diff = real_layer - false_layer
        layer_loss = F.mean(F.sum(F.abs(layer_diff), axis=[3]))  # an l1 loss
        layer_loss_list += [layer_loss]
        scaled_layer_loss = fix_range * \
            layer_loss / layer_norm[layer_i]
        sum_layer_loss += scaled_layer_loss

    return sum_layer_loss
Exemple #16
0
def train(generator, discriminator, patch_gan, solver_gen, solver_dis,
          weight_l1, train_iterator, val_iterator, epoch, monitor, interval):
    # Create Network Graph
    # for training
    im, la = train_iterator.next()  # for checking image shape
    real = nn.Variable(im.shape)  # real
    x = nn.Variable(la.shape)  # x
    # for validation
    real_val = nn.Variable(im.shape)  # real
    x_val = nn.Variable(la.shape)  # x

    # Generator
    fake = generator(x, test=False)
    # pix2pix infers just like training mode.
    fake_val = generator(x_val, test=False)
    fake_val.persistent = True  # Keep to visualize
    # Discriminator
    fake_y = discriminator(x, fake, patch_gan=patch_gan, test=False)
    real_y = discriminator(x, real, patch_gan=patch_gan, test=False)
    real_target = nn.Variable(fake_y.shape)
    real_target.data.fill(1)
    fake_target = nn.Variable(real_y.shape)
    fake_target.data.zero()

    loss_gen = F.mean(weight_l1 * F.abs(real - fake)) + \
        F.mean(F.sigmoid_cross_entropy(fake_y, real_target))
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(real_y, real_target) +
        F.sigmoid_cross_entropy(fake_y, fake_target))

    # Setting Solvers
    with nn.parameter_scope('generator'):
        solver_gen.set_parameters(nn.get_parameters())

    with nn.parameter_scope('discriminator'):
        solver_dis.set_parameters(nn.get_parameters())

    # Create Monitors
    monitors = {
        'loss_gen':
        nm.MonitorSeries("Generator loss", monitor, interval=interval),
        'loss_dis':
        nm.MonitorSeries("Discriminator loss", monitor, interval=interval),
        'time':
        nm.MonitorTimeElapsed("Training time", monitor, interval=interval),
        'fake':
        nm.MonitorImageTile(
            "Fake images",
            monitor,
            interval=interval,
            num_images=2,
            normalize_method=lambda x: np.clip(np.divide(x, 255.0), 0.0, 1.0)),
    }

    i = 0
    for e in range(epoch):
        logger.info('Epoch = {}'.format(e))
        # Training
        while e == train_iterator.epoch:
            # forward / backward process
            real.d, x.d = train_iterator.next()
            solver_dis.zero_grad()
            solver_gen.zero_grad()
            # Discriminator
            loss_dis.forward(clear_no_need_grad=True)
            loss_dis.backward(clear_buffer=True)
            solver_dis.update()
            # Generator
            loss_gen.forward(clear_no_need_grad=True)
            loss_gen.backward(clear_buffer=True)
            solver_gen.update()
            monitors['time'].add(i)
            monitors['loss_gen'].add(i, loss_gen.d.copy())
            monitors['loss_dis'].add(i, loss_dis.d.copy())
            # Validation
            real_val.d, x_val.d = val_iterator.next()
            fake_val.forward()
            pix2pix_vis = np.stack(
                [label_to_image(x_val.d),
                 normalize_image(fake_val.d)],
                axis=1).reshape((-1, ) + fake.shape[1:])
            monitors['fake'].add(i, pix2pix_vis)
            i += 1
    # save parameters of generator
    save_path = os.path.join(monitor._save_path,
                             'generator_model_{}.h5'.format(i))
    with nn.parameter_scope('generator'):
        nn.save_parameters(save_path)

    return save_path
Exemple #17
0
def main():

    random.seed(args.seed)
    np.random.seed(args.seed)

    # Prepare for CUDA.
    ctx = get_extension_context('cudnn', device_id=args.gpus)
    nn.set_default_context(ctx)

    start_full_time = time.time()
    from iterator import data_iterator

    # Data list for sceneflow data set
    train_list = "./dataset/sceneflow_train.csv"
    test_list = "./dataset/sceneflow_test.csv"
    train = True
    validation = True

    # Set monitor path.
    monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S"))

    img_left, img_right, disp_img = read_csv(train_list)
    img_left_test, img_right_test, disp_img_test = read_csv(test_list)
    train_samples = len(img_left)
    test_samples = len(img_left_test)
    train_size = int(len(img_left) / args.batchsize_train)
    test_size = int(len(img_left_test) / args.batchsize_test)

    # Create data iterator.
    data_iterator_train = data_iterator(
        train_samples, args.batchsize_train, img_left, img_right, disp_img, train=True, shuffle=True, dataset=args.dataset)
    data_iterator_test = data_iterator(
        test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, train=False, shuffle=False, dataset=args.dataset)

    # Set data size

    print(train_size, test_size)

    # Define data shape for training.
    var_left = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_right = nn.Variable(
        (args.batchsize_train, 3, args.crop_height, args.crop_width))
    var_disp = nn.Variable(
        (args.batchsize_train, 1, args.crop_height, args.crop_width))
    # Define data shape for testing.
    var_left_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_right_test = nn.Variable(
        (args.batchsize_test, 3, args.im_height, args.im_width))
    var_disp_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))
    mask_test = nn.Variable(
        (args.batchsize_test, 1, args.im_height, args.im_width))

    if args.loadmodel is not None:
        # Loading CNN pretrained parameters.
        nn.load_parameters(args.loadmodel)

    # === for Training ===
    # Definition of pred
    pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True)
    mask_train = F.less_scalar(var_disp, args.maxdisp)
    sum_mask = F.maximum_scalar(F.sum(mask_train), 1)
    # Definition of loss
    loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss(
        pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask))

    # === for Testing ===
    # Definition of pred
    mask_test = F.less_scalar(var_disp_test, args.maxdisp)
    sum_mask_test = F.maximum_scalar(F.sum(mask_test), 1)
    pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False)
    test_loss = F.sum(F.abs(pred_test - var_disp_test)*mask_test)/sum_mask_test

    # Prepare monitors.
    monitor = Monitor(monitor_path)
    monitor_train = MonitorSeries('Training loss', monitor, interval=1)
    monitor_test = MonitorSeries('Validation loss', monitor, interval=1)
    monitor_time_train = MonitorTimeElapsed(
        "Training time/epoch", monitor, interval=1)

    # Create a solver (parameter updater)
    solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999)

    # Set Parameters
    params = nn.get_parameters()
    solver.set_parameters(params)
    params2 = nn.get_parameters(grad_only=False)
    solver.set_parameters(params2)

    for epoch in range(1, args.epochs+1):
        print('This is %d-th epoch' % (epoch))

        if validation:
            ## teting ##
            total_test_loss = 0

            index_test = 0
            while index_test < test_size:
                var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next()
                test_loss.forward(clear_no_need_grad=True)
                total_test_loss += test_loss

                print('Iter %d test loss = %.3f' % (index_test, test_loss.d))
                index_test += 1
            test_error = total_test_loss/test_size
            print('epoch %d total 3-px error in val = %.3f' %
                  (epoch, test_error.d))
            # Pass validation loss to a monitor.
            monitor_test.add(epoch, test_error)

        if train:
            ## training ##
            total_train_loss = 0
            index = 0

            while index < train_size:

                # Get mini batch
                # Preprocess
                var_left.d, var_right.d, var_disp.d = data_iterator_train.next()
                loss.forward(clear_no_need_grad=True)
                # Initialize gradients
                solver.zero_grad()
                # Backward execution
                loss.backward(clear_buffer=True)
                # Update parameters by computed gradients
                solver.update()
                print('Iter %d training loss = %.3f' %
                      (index, loss.d))
                total_train_loss += loss.d
                index += 1
            train_error = total_train_loss/train_size
            monitor_time_train.add(epoch)
            print('epoch %d total training loss = %.3f' % (epoch, train_error))

            # Pass training loss to a monitor.
            monitor_train.add(epoch, train_error)
            print('full training time = %.2f HR' %
                  ((time.time() - start_full_time)/3600))

            # Save Parameter
            out_param_file = os.path.join(
                args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5')
            nn.save_parameters(out_param_file)
Exemple #18
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        r = F.mean(F.abs(h - one))
    return r
Exemple #19
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.abs(pred0 - pred1))
    return loss_sr
Exemple #20
0
def reconstruction_loss(imgA, imgB):
    return F.mean(F.abs(imgA - imgB))
Exemple #21
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        r = F.mean(F.abs(h - one))
    return r
Exemple #22
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.abs(pred0 - pred1))
    return loss_sr
def parametric_pow2_quantize(x,
                             sign=True,
                             with_zero=True,
                             n_init=8,
                             n_min=1,
                             n_max=16,
                             m_init=1,
                             m_min=-8,
                             m_max=8,
                             fix_parameters=False):
    """Parametric version of `pow2_quantize` where the
    bitwidth `n` and dynamic range `m` are learnable parameters.

    Args:
        x(~nnabla.Variable): N-D array as input
        sign (bool): keep sign information during quantization.
        with_zero (bool): quantize small weights to zero.
        n_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for bitwidth parameter.
        n_min (int): lower bound for bitwidth.
        n_max (int): upper bound for bitwidth.
        m_init (:obj:`nnabla.initializer.BaseInitializer` or :obj:`numpy.ndarray`): Initializer for dynamic range.
        m_min (float): lower bound for dynamic range.
        m_max (float): upper bound for dynamic range.
        fix_parameters (bool): When set to `True`, the negative slope values
            will not be updated.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    def clip_scalar(v, min_value, max_value):
        return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)

    def broadcast_scalar(v, shape):
        return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False),
                           shape=shape)

    def quantize_pow2(v):
        return 2**F.round(F.log(F.abs(v)) / np.log(2.))

    n = get_parameter_or_create("n", (),
                                ConstantInitializer(n_init),
                                need_grad=True,
                                as_need_grad=not fix_parameters)
    m = get_parameter_or_create("m", (),
                                ConstantInitializer(m_init),
                                need_grad=True,
                                as_need_grad=not fix_parameters)

    # ensure that bitwidth is in specified range and an integer
    n_q = F.round(clip_scalar(n, n_min, n_max))
    if sign:
        n_q = n_q - 1
    if with_zero:
        n_q = n_q - 1

    # ensure that dynamic range is in specified range and an integer
    m_q = F.round(clip_scalar(m, m_min, m_max))

    # compute min/max value that we can represent
    x_max = 2**m_q
    x_min = 2**(m_q - (2**n_q) + 1)

    # broadcast variables to correct size
    x_min = broadcast_scalar(x_min, shape=x.shape)
    x_max = broadcast_scalar(x_max, shape=x.shape)

    # if unsigned, then quantize all negative values to zero
    if not sign:
        x = F.relu(x)

    # compute absolute value/sign of input
    ax = F.abs(x)
    sx = F.sign(x)

    if with_zero:
        # prune smallest elements (in magnitude) to zero if they are smaller
        # than `x_min / \sqrt(2)`
        x_threshold = x_min / np.sqrt(2)

        idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, x_min)
        idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max)
        idx3 = F.greater_equal(ax, x_max)
    else:
        idx1 = F.less(ax, x_min)
        idx2 = F.greater_equal(ax, x_min) * F.less(ax, x_max)
        idx3 = F.greater_equal(ax, x_max)

    # do not backpropagate gradient through indices
    idx1.need_grad = False
    idx2.need_grad = False
    idx3.need_grad = False

    # do not backpropagate gradient through sign
    sx.need_grad = False

    # take care of values outside of dynamic range
    return sx * (x_min * idx1 + quantize_pow2(ax) * idx2 + x_max * idx3)
def parametric_pow2_quantize_xmin_xmax(x,
                                       sign=True,
                                       with_zero=True,
                                       xmin_init=2**-7,
                                       xmin_min=2**-15,
                                       xmin_max=256,
                                       xmax_init=2**0,
                                       xmax_min=2**-8,
                                       xmax_max=256,
                                       fix_parameters=False):
    """Parametric version of `pow2_quantize` where the
    min value `xmin` and max value `xmax` are learnable parameters.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    def clip_scalar(v, min_value, max_value):
        return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)

    def broadcast_scalar(v, shape):
        return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False),
                           shape=shape)

    def quantize_pow2(v):
        return 2.**F.round(F.log(F.abs(v)) / np.log(2.))

    xmin = get_parameter_or_create("xmin", (),
                                   ConstantInitializer(xmin_init),
                                   need_grad=True,
                                   as_need_grad=not fix_parameters)
    xmax = get_parameter_or_create("xmax", (),
                                   ConstantInitializer(xmax_init),
                                   need_grad=True,
                                   as_need_grad=not fix_parameters)

    # ensure that minimum dynamic range is in specified range and a power-of-two
    xmin = quantize_pow2(clip_scalar(xmin, xmin_min, xmin_max))

    # ensure that minimum dynamic range is in specified range and a power-of-two
    xmax = quantize_pow2(clip_scalar(xmax, xmax_min, xmax_max))

    # broadcast variables to correct size
    xmin = broadcast_scalar(xmin, shape=x.shape)
    xmax = broadcast_scalar(xmax, shape=x.shape)

    # if unsigned, then quantize all negative values to zero
    if not sign:
        x = F.relu(x)

    # compute absolute value/sign of input
    ax = F.abs(x)
    sx = F.sign(x)

    if with_zero:
        # prune smallest elements (in magnitude) to zero if they are smaller
        # than `x_min / \sqrt(2)`
        x_threshold = xmin / np.sqrt(2)

        idx1 = F.greater_equal(ax, x_threshold) * F.less(ax, xmin)
        idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax)
        idx3 = F.greater_equal(ax, xmax)
    else:
        idx1 = F.less(ax, xmin)
        idx2 = F.greater_equal(ax, xmin) * F.less(ax, xmax)
        idx3 = F.greater_equal(ax, xmax)

    # do not backpropagate gradient through indices
    idx1.need_grad = False
    idx2.need_grad = False
    idx3.need_grad = False

    # do not backpropagate gradient through sign
    sx.need_grad = False

    # take care of values outside of dynamic range
    return sx * (xmin * idx1 + quantize_pow2(ax) * idx2 + xmax * idx3)
 def quantize_pow2(v):
     return 2.**F.round(F.log(F.abs(v)) / np.log(2.))
Exemple #26
0
    def bidirectional_sphere_trace(self, camloc, raydir, t_start, t_finish):
        t_f = F.identity(t_start)
        x_f = camloc + t_f * raydir
        s_f = self.sdf(x_f)
        mask_hit_eps_f = 0 * F.identity(t_f)

        t_b = F.identity(t_finish)
        x_b = camloc + t_b * raydir
        s_b = self.sdf(x_b)
        mask_hit_eps_b = 0 * F.identity(t_b)

        for i in range(self.sphere_trace_itr - 1):
            # Forward direction
            mask_hit_eps_f_i = F.less_equal_scalar(F.abs(s_f), self.eps)
            mask_hit_eps_f += (1 - mask_hit_eps_f) * mask_hit_eps_f_i
            t_f += (1 - mask_hit_eps_f) * s_f
            x_f = camloc + t_f * raydir

            s_f_prev = F.identity(s_f)
            s_f = self.sdf(x_f)
            mask_pos_f_prev = (1 - mask_hit_eps_f) * \
                F.greater_scalar(s_f_prev, 0)
            mask_neg_f = (1 - mask_hit_eps_f) * F.less_scalar(s_f, 0)
            mask_revert_f = mask_pos_f_prev * mask_neg_f
            t_f -= mask_revert_f * s_f_prev
            s_f = mask_revert_f * s_f_prev + (1 - mask_revert_f) * s_f

            # Backward direction
            mask_hit_eps_b_i = F.less_equal_scalar(F.abs(s_b), self.eps)
            mask_hit_eps_b += (1 - mask_hit_eps_b) * mask_hit_eps_b_i
            t_b -= (1 - mask_hit_eps_b) * s_b
            x_b = camloc + t_b * raydir

            s_b_prev = F.identity(s_b)
            s_b = self.sdf(x_b)
            mask_pos_b_prev = (1 - mask_hit_eps_b) * \
                F.greater_scalar(s_b_prev, 0)
            mask_neg_b = (1 - mask_hit_eps_b) * F.less_scalar(s_b, 0)
            mask_revert_b = mask_pos_b_prev * mask_neg_b
            t_b += mask_revert_b * s_b_prev
            s_b = mask_revert_b * s_b_prev + (1 - mask_revert_b) * s_b

            ## print("s_f neg", np.sum(s_f.data < 0))
            ## print("s_b neg", np.sum(s_b.data < 0))

        # Fine grained start/finish points
        t_f0 = t_f
        t_f1 = t_f + mask_revert_f * s_f_prev
        x_hit_st0 = camloc + t_f0 * raydir
        ## x0, x1 = self.post_method(x_hit_st0, camloc + t_f1 * raydir)
        ## t_f0 = F.norm((x0 - camloc), axis=(x0.ndim - 1), keepdims=True)
        ## t_f1 = F.norm((x1 - camloc), axis=(x1.ndim - 1), keepdims=True)
        mask_hit_f1b = mask_revert_f * F.less(t_f1, t_b)
        t_b = t_f1 * mask_hit_f1b + t_b * (1 - mask_hit_f1b)

        # Reverse the opposite case
        mask_fb = F.less(t_f, t_b)
        t_f = t_f * mask_fb + t_start * (1 - mask_fb)
        t_b = t_b * mask_fb + t_finish * (1 - mask_fb)

        return x_hit_st0, t_f, t_b, mask_hit_eps_f
def get_tecogan_model(conf, r_inputs, r_targets, scope_name, tecogan=True):
    """
    Create computation graph and variables for TecoGAN.
    """
    # r_inputs, r_targets : shape (batch, conf.train.rnn_n, h, w, c)
    rnn_length = conf.train.rnn_n
    if tecogan:
        r_inputs, r_targets = get_tecogan_inputs(r_inputs, r_targets)
        rnn_length = rnn_length * 2 - 1

    # get the consecutive frame sequences from the input sequence
    frame_t_pre, frame_t = r_inputs[:, 0:-1, :, :, :], r_inputs[:, 1:, :, :, :]

    # Get flow estimations
    fnet_output = get_fnet_output(conf, rnn_length, frame_t_pre, frame_t,
                                  scope_name)

    # Get the generated HR output frames
    gen_outputs = get_generator_output(conf, rnn_length, r_inputs,
                                       fnet_output.flow_hr, scope_name)

    s_gen_output = F.reshape(
        gen_outputs, (conf.train.batch_size * rnn_length,
                      conf.train.crop_size * 4, conf.train.crop_size * 4, 3),
        inplace=False)
    s_targets = F.reshape(
        r_targets, (conf.train.batch_size * rnn_length,
                    conf.train.crop_size * 4, conf.train.crop_size * 4, 3),
        inplace=False)

    # Content loss (l2 loss)
    content_loss = F.mean(
        F.sum(F.squared_error(s_gen_output, s_targets), axis=[3]))
    # Warp loss (l2 loss)
    warp_loss = get_warp_loss(conf, rnn_length, frame_t, frame_t_pre,
                              fnet_output.flow_lr)

    if tecogan:
        d_data = get_d_data(conf, fnet_output.flow_hr, gen_outputs, r_targets,
                            rnn_length)
        # Build the tempo discriminator for the real part and fake part
        t_d = get_t_d(conf, r_inputs, d_data)

        # Discriminator layer loss:
        d_layer_loss = get_d_layer(t_d.real_layers, t_d.fake_layers)
        # vgg loss (cosine similarity)
        loss_vgg = get_vgg_loss(s_gen_output, s_targets)
        # ping pong loss (an l1 loss)
        gen_out_first = gen_outputs[:, 0:conf.train.rnn_n - 1, :, :, :]
        gen_out_last_rev = gen_outputs[:, -1:-conf.train.rnn_n:-1, :, :, :]
        pp_loss = F.mean(F.abs(gen_out_first - gen_out_last_rev))
        # adversarial loss
        t_adversarial_loss = F.mean(-F.log(t_d.tdiscrim_fake_output +
                                           conf.train.eps))

        # Overall generator loss
        gen_loss = content_loss + pp_loss * conf.gan.pp_scaling + conf.gan.ratio * \
            t_adversarial_loss + conf.gan.vgg_scaling * loss_vgg + \
            conf.gan.dt_ratio_0 * d_layer_loss

        # Discriminator loss
        t_discrim_fake_loss = F.log(1 - t_d.tdiscrim_fake_output +
                                    conf.train.eps)
        t_discrim_real_loss = F.log(t_d.tdiscrim_real_output + conf.train.eps)
        t_discrim_loss = F.mean(-(t_discrim_fake_loss + t_discrim_real_loss))

        fnet_loss = gen_loss + warp_loss

        set_persistent_all(r_targets, r_inputs, loss_vgg, gen_out_first,
                           gen_out_last_rev, pp_loss, d_layer_loss,
                           content_loss, warp_loss, gen_loss,
                           t_adversarial_loss, t_discrim_loss,
                           t_discrim_real_loss, d_data.t_vel,
                           d_data.t_gen_output, s_gen_output, s_targets)

        Network = collections.namedtuple(
            'Network', 'content_loss,  warp_loss, fnet_loss, vgg_loss,'
            'gen_loss, pp_loss, sum_layer_loss,t_adversarial_loss,'
            't_discrim_loss,t_gen_output,t_discrim_real_loss')
        return Network(content_loss=content_loss,
                       warp_loss=warp_loss,
                       fnet_loss=fnet_loss,
                       vgg_loss=loss_vgg,
                       gen_loss=gen_loss,
                       pp_loss=pp_loss,
                       sum_layer_loss=d_layer_loss,
                       t_adversarial_loss=t_adversarial_loss,
                       t_discrim_loss=t_discrim_loss,
                       t_gen_output=d_data.t_gen_output,
                       t_discrim_real_loss=t_discrim_real_loss)

    gen_loss = content_loss
    fnet_loss = gen_loss + warp_loss
    set_persistent_all(content_loss, s_gen_output, warp_loss, gen_loss,
                       fnet_loss)

    Network = collections.namedtuple(
        'Network', 'content_loss, warp_loss, fnet_loss, gen_loss')
    return Network(
        content_loss=content_loss,
        warp_loss=warp_loss,
        fnet_loss=fnet_loss,
        gen_loss=gen_loss,
    )
Exemple #28
0
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-train-examples", type=int, default=1600)
    parser.add_argument("--num-valid-examples", type=int, default=100)
    parser.add_argument("--accum-grad", type=int, default=32)
    parser.add_argument("--max-iter", type=int, default=6400)
    parser.add_argument("--valid-interval", type=int, default=100)
    parser.add_argument("--context", type=str, default="cpu")
    parser.add_argument("--device-id", type=int, default=0)

    args = parser.parse_args()

    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    ctx = get_extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # prepare dataset
    tdataset = []
    for i in range(args.num_train_examples):
        V, E = random_graph(rng)
        deg = degrees(V, E)
        tdataset.append(([V], [utils.from_adjacency_list(E)], [deg]))

    vdataset = []
    for i in range(args.num_valid_examples):
        V, E = random_graph(rng)
        deg = degrees(V, E)
        vdataset.append(([V], [utils.from_adjacency_list(E)], [deg]))

    # prepare data iterator
    tdata = data_iterator(SimpleDataSource2(tdataset, shuffle=True), 1, False,
                          False, False)
    vdata = data_iterator(SimpleDataSource2(vdataset, shuffle=False), 1, False,
                          False, False)

    # prepare monitors
    monitor = M.Monitor("./degree")
    tloss = M.MonitorSeries("Training Loss", monitor, interval=10)

    verror = M.MonitorSeries("Validation Error", monitor, interval=10)

    # prepare solver
    solver = S.Adam()

    # training loop
    for i in range(args.max_iter):
        l = 0
        for b in range(args.accum_grad):
            # read data
            V, E, degree = tdata.next()
            V = V[0][0]
            E = E[0][0]
            degree = degree[0][0]

            # predict
            output = predict(V, E)

            # initialize solver
            if i == 0 and b == 0:
                solver.set_parameters(nn.get_parameters())

            # calculate loss
            label = nn.Variable(degree.shape)
            label.data.data = degree
            label = F.reshape(label, (len(V), 1))
            loss = F.mean(F.squared_error(output, label))

            # training
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            l += loss.data.data

        solver.update()

        tloss.add(i, l / args.accum_grad)
        l = 0

        if i % args.valid_interval == 0:
            # validation
            # read data
            e = 0
            n = 0
            for b in range(vdata.size):
                V, E, degree = vdata.next()
                V = V[0][0]
                E = E[0][0]
                degree = degree[0][0]

                output = predict(V, E)

                label = nn.Variable(degree.shape)
                label.data.data = degree
                label = F.reshape(label, (len(V), 1))
                error = F.sum(F.less_scalar(F.abs(F.sub2(output, label)), 0.5))

                error.forward()

                e += error.data.data
                n += len(V)
            verror.add(i, e / n)
Exemple #29
0
def calculate_alpha(
    parameter_list,
    X,
    Y,
    Y_label,
    feature_valid,
    solver,
    output_valid,
    pred,
    loss,
    phi,
    l2,
):

    min_loss = 10000.0
    feature_valid.d = X
    output_valid.d = Y
    for epoch in range(args.epoch):
        phi_loss = 0

        loss.forward()
        solver.zero_grad()
        loss.backward()
        phi_loss = phi.d / len(X)
        temp_W = parameter_list
        grad_loss = F.add_n(
            *[F.mean(F.abs(p.grad)) for p in nn.get_parameters().values()])
        grad_norm = F.add_n(
            *[F.norm(p.grad) for p in nn.get_parameters().values()])

        if grad_loss.data < min_loss:
            if epoch == 0:
                init_grad = grad_loss.data
            min_loss = grad_loss.data
            best_W = temp_W
            if min_loss < init_grad / 200:
                print("stopping criteria reached in epoch :{}".format(epoch))
                break
        parameter_list = backtracking_line_search(grad_norm, X, Y, loss,
                                                  len(X), loss.d, l2)
        if epoch % 100 == 0:
            print("Epoch:{:4d}\tloss:{}\tphi_loss:{}\tl2(lmbd):{}\tgrad:{}".
                  format(epoch, loss.d, phi_loss, args.lmbd * l2.d,
                         grad_loss.data))

    for weight, param in zip(nn.get_parameters().values(), best_W):
        weight.data.copy_from(param.data)

    softmax_value = F.softmax(pred)
    softmax_value.forward()
    # derivative of softmax cross entropy
    weight_matrix = softmax_value.d - Y
    weight_matrix = np.divide(weight_matrix, (-2.0 * args.lmbd * len(Y)))

    np.save(os.path.join(data_dir, "weight_matrix.npy"), weight_matrix)

    # computer alpha
    alpha = []
    for ind, label in enumerate(Y_label.reshape(-1)):
        alpha.append(float(weight_matrix[ind, int(label)]))
    alpha = np.abs(np.array(alpha))
    np.save(os.path.join(data_dir, "alpha_vgg_nnabla_score.npy"), alpha)

    # calculate correlation
    w = np.matmul(X.T, weight_matrix)
    temp = np.matmul(X, w)
    softmax_value = F.softmax(nn.Variable.from_numpy_array(temp))
    softmax_value.forward()
    y_p = softmax_value.d

    print(
        "L1 difference between ground truth prediction and prediction by representer theorem decomposition"
    )
    print(np.mean(np.abs(Y - y_p)))

    from scipy.stats.stats import pearsonr

    print(
        "pearson correlation between ground truth  prediction and prediciton by representer theorem"
    )
    corr, _ = pearsonr(Y.reshape(-1), y_p.reshape(-1))
    print(corr)