コード例 #1
0
ファイル: model.py プロジェクト: sony/ai-research-code
 def __call__(self, features):
     upsampled_inputs = [
         F.interpolate(x,
                       output_size=features[0].shape[2:],
                       mode='linear',
                       align_corners=False,
                       half_pixel=True) for x in features
     ]
     inputs = F.concatenate(*upsampled_inputs, axis=1)
     out = self.conv2d(inputs,
                       self.hparams['channels'],
                       kernel_size=1,
                       stride=1,
                       bias=False,
                       name='convs/0/conv')
     out = F.relu(self.batch_norm(out, name='convs/0/bn'))
     out = self.conv2d(out,
                       self.hparams['num_classes'],
                       kernel_size=1,
                       stride=1,
                       bias=True,
                       name='conv_seg')
     out = F.interpolate(out,
                         output_size=self.output_size,
                         mode='linear',
                         align_corners=False,
                         half_pixel=True)
     if self.test:
         return F.softmax(out, axis=1)
     return out
コード例 #2
0
ファイル: nonlocal_net.py プロジェクト: sony/nnabla-examples
def layer5_1(x):
    pad5_1 = F.pad(x, (1, 1, 1, 1), 'reflect')
    conv5_1 = PF.convolution(
        pad5_1, 256, kernel=(
            3, 3), stride=(
            1, 1), name='layer5_1.1')
    conv5_1 = F.instance_normalization(
        conv5_1, gamma=None, beta=None, channel_axis=1)
    conv5_1 = PF.prelu(conv5_1, name='layer5_1.3')
    up5_1 = F.interpolate(
        conv5_1,
        scale=(
            2,
            2),
        mode='nearest',
        align_corners=False)
    pad5_2 = F.pad(up5_1, (1, 1, 1, 1), 'reflect')
    conv5_2 = PF.convolution(
        pad5_2, 64, kernel=(
            3, 3), stride=(
            1, 1), name='layer5_1.6')
    conv5_2 = F.instance_normalization(
        conv5_2, gamma=None, beta=None, channel_axis=1)
    conv5_2 = PF.prelu(conv5_2, name='layer5_1.8')
    up5_2 = F.interpolate(
        conv5_2,
        scale=(
            2,
            2),
        mode='nearest',
        align_corners=False)
    return up5_2
コード例 #3
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def rrdb_net(x, num_output_channel, num_rrdb_blocks, growth_channel=32):
    '''
    :param x: input image
    :param num_output_channel: number of output channels
    :param num_rrdb_blocks: number of residual blocks
    :param growth_channel: growth channel (no. of intermediate channel)
    :return:
    '''
    fea = PF.convolution(x,
                         num_output_channel,
                         kernel=(3, 3),
                         stride=(1, 1),
                         pad=(1, 1),
                         name='conv_first')
    h = fea
    with nn.parameter_scope('RRDB_trunk'):
        for i in range(num_rrdb_blocks):
            with nn.parameter_scope('{}'.format(i)):
                h = rrdb(h, num_output_channel, growth_channel)

    trunk_conv = PF.convolution(h,
                                num_output_channel,
                                kernel=(3, 3),
                                stride=(1, 1),
                                pad=(1, 1),
                                name='trunk_conv')
    fea = fea + trunk_conv
    up_conv1 = F.leaky_relu(PF.convolution(F.interpolate(fea,
                                                         scale=(2, 2),
                                                         mode='nearest'),
                                           num_output_channel,
                                           kernel=(3, 3),
                                           stride=(1, 1),
                                           pad=(1, 1),
                                           name='upconv1'),
                            alpha=0.2)
    up_conv2 = F.leaky_relu(PF.convolution(F.interpolate(up_conv1,
                                                         scale=(2, 2),
                                                         mode='nearest'),
                                           num_output_channel,
                                           kernel=(3, 3),
                                           stride=(1, 1),
                                           pad=(1, 1),
                                           name='upconv2'),
                            alpha=0.2)
    hr_conv = F.leaky_relu(PF.convolution(up_conv2,
                                          num_output_channel,
                                          kernel=(3, 3),
                                          stride=(1, 1),
                                          pad=(1, 1),
                                          name='HRconv'),
                           alpha=0.2)
    conv_last = PF.convolution(hr_conv,
                               3,
                               kernel=(3, 3),
                               stride=(1, 1),
                               pad=(1, 1),
                               name='conv_last')
    return conv_last
コード例 #4
0
def interpolate_nn(image, frame, scale):
    '''
    Linear Interpolation on Variable image and frame
    Args:
        image  : image (Variable) 
        frame  : frame (Variable) 
    Returns
        linear interpolated image and frame 
    '''
    image = F.interpolate(image, scale)
    frame = F.interpolate(frame, scale)
    return image, frame
コード例 #5
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        # output_size is the primary argument even if `scale` specified.
        output_size = self.forward_func.info.args["output_size"]
        mode = self.forward_func.info.args["mode"]
        align_corners = self.forward_func.info.args["align_corners"]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[1]:
            g_dy_ = F.interpolate(
                g_dx0, output_size=output_size, mode=mode, align_corners=align_corners)
            if accum[1]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
コード例 #6
0
ファイル: train.py プロジェクト: wpfhtl/nnabla-examples
def get_model(args, test=False):
    """
    Create computation graph and variables.

    """

    image = nn.Variable(
        [args.batch_size, 3, args.image_height, args.image_width])
    label = nn.Variable(
        [args.batch_size, 1, args.image_height, args.image_width])
    mask = nn.Variable(
        [args.batch_size, 1, args.image_height, args.image_width])

    pred = model.deeplabv3plus_model(image,
                                     args.output_stride,
                                     args.num_class,
                                     test=test,
                                     fix_params=False)

    if pred.shape != label.shape:
        pred = F.interpolate(pred,
                             output_size=(label.shape[2], label.shape[3]),
                             mode='linear')

    loss = F.sum(
        F.softmax_cross_entropy(pred, label, axis=1) * mask) / F.sum(mask)
    Model = namedtuple('Model', ['image', 'label', 'mask', 'pred', 'loss'])
    return Model(image, label, mask, pred, loss)
コード例 #7
0
def Upsample(h, nmap_out, scope_name, scale=2):
    with nn.parameter_scope(scope_name):
        def sn_w(w): return PF.spectral_norm(w, dim=0)
        h = F.interpolate(h, scale=(scale, scale), mode="nearest")
        h = PF.convolution(h, nmap_out*2, (3, 3), pad=(1, 1),
                           apply_w=sn_w, with_bias=False, name="conv1")
        h = PF.batch_normalization(h)
        h = GLU(h)
    return h
コード例 #8
0
ファイル: test_interpolate.py プロジェクト: sony/nnabla
def test_interpolate_nearest_double_backward(seed, inshape, outsize, scale,
                                             sdim_only, align_corners,
                                             half_pixel, half_pixel_for_nn,
                                             channel_last, ctx, func_name):
    if channel_last and func_name == "Interpolate":
        pytest.skip("Interpolate with channel_last is only supported in CUDA.")
    if sdim_only and channel_last:
        pytest.skip(
            "Interpolate for spatial dimension only data is only supported for channel_first option."
        )
    from nbla_test_utils import backward_function_tester, grad_function_forward_function_output
    from nnabla.backward_function.interpolate import InterpolateDataGrad

    rng = np.random.RandomState(seed)
    inputs = [rng.randn(*inshape).astype(np.float32)]
    func_args = [
        scale, outsize, 'nearest', align_corners, half_pixel,
        half_pixel_for_nn, channel_last
    ]
    # 2nd-order
    backward_function_tester(rng,
                             F.interpolate,
                             inputs,
                             func_args=func_args,
                             atol_f=1e-6,
                             atol_accum=1e-2,
                             dstep=1e-3,
                             ctx=ctx)
    # 3rd-order
    # F.interpolate takes scale and output_size while InterpolateDataGrad takes only output_size
    # for passing kwargs in the nn.grad, same as F.Interpolate
    import nnabla as nn
    import math
    vinputs = [
        nn.Variable(inp.shape) if inp is not None else None for inp in inputs
    ]
    y = F.interpolate(*(vinputs + func_args))
    x = inputs[0]
    if scale:
        input_size = x.shape[-len(scale) -
                             1:-1] if channel_last else x.shape[-len(scale):]
        output_size = [
            int(math.floor(s * d)) for d, s in zip(input_size, scale)
        ]
    else:
        output_size = outsize
    df = InterpolateDataGrad(ctx, *([output_size] + func_args[2:]))
    df.xshape = x.shape
    ginputs = [rng.randn(*y.shape)]
    backward_function_tester(rng,
                             df,
                             ginputs,
                             func_args=[],
                             ctx=ctx,
                             atol_f=1e-6,
                             atol_accum=5e-2,
                             non_accum_check=True)
コード例 #9
0
def upsample(x, name, with_conv):
    with nn.parameter_scope(name):
        B, C, H, W = x.shape
        x = F.interpolate(x, scale=(2, 2), mode="nearest", align_corners=True)
        assert x.shape == (B, C, H * 2, W * 2)
        if with_conv:
            x = conv(x, C, "upsample_conv")
            assert x.shape == (B, C, H * 2, W * 2)
        return x
コード例 #10
0
def upsample(x, factor, training, left_shape=None):
    if len(x.shape) == 4:
        if training:
            h = F.interpolate(x,
                              scale=(factor, factor),
                              mode='linear',
                              align_corners=True)
        else:
            h = F.interpolate(x,
                              output_size=(left_shape[2] // 4,
                                           left_shape[3] // 4),
                              mode='linear',
                              align_corners=True)
    elif len(x.shape) == 5:
        planes = x.shape[1]
        kernel_size = 2 * factor - factor % 2
        stride = int(factor)
        pad = int(math.ceil((factor - 1) / 2.))
        scale_factor = (kernel_size + 1) // 2
        if kernel_size % 2 == 1:
            center = scale_factor - 1
        else:
            center = scale_factor - 0.5
        bilinear_kernel = np.zeros([kernel_size, kernel_size, kernel_size],
                                   dtype=np.float32)
        for i in range(kernel_size):
            for j in range(kernel_size):
                for d in range(kernel_size):
                    bilinear_kernel[
                        i, j, d] = (1 - abs(i - center) / scale_factor) * (
                            1 - abs(j - center) / scale_factor) * (
                                1 - abs(d - center) / scale_factor)
        w_filter = np.zeros([1, planes, kernel_size, kernel_size, kernel_size])
        for i in range(planes):
            w_filter[:, i, :, :, :] = bilinear_kernel
        h = PF.deconvolution(x,
                             planes,
                             kernel=(kernel_size, kernel_size, kernel_size),
                             pad=(pad, pad, pad),
                             stride=(stride, stride, stride),
                             w_init=w_filter,
                             fix_parameters=True,
                             group=planes)
    return h
コード例 #11
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def upsample(h, maps, up, test=False, name="convblock"):
    if up == "nearest":
        h = PF.convolution(h, maps, (3, 3), (1, 1), name=name)
        h = F.interpolate(h, scale=(2, 2), mode="nearest")
    elif up == "linear":
        h = PF.convolution(h, maps, (3, 3), (1, 1), name=name)
        h = F.interpolate(h, scale=(2, 2), mode="linear")
    elif up == "unpooling":
        h = PF.convolution(h, maps, (3, 3), (1, 1), name=name)
        h = F.unpooling(h, (2, 2))
    elif up == "deconv":
        h = PF.deconvolution(h, maps * 2, (2, 2), (0, 0), (2, 2), name=name)
    else:
        raise ValueError(
            'Set "up" option in ["nearest", "linear", "unpooling", "deconv"]')
    h = PF.batch_normalization(h, batch_stat=not test, name=name)
    h = F.relu(h)

    return h
コード例 #12
0
 def up_block(input, output_channels=64, stride=1, scope='up_block'):
     with nn.parameter_scope(scope):
         net = conv2d(input, output_channels, (3, 3),
                      (stride, stride), name='conv_1')
         net = F.leaky_relu(net, 0.2)
         net = conv2d(net, output_channels, (3, 3),
                      (stride, stride), name='conv_2')
         net = F.leaky_relu(net, 0.2)
         net = F.interpolate(net, scale=(2, 2), channel_last=True)
     return net
コード例 #13
0
def deform_input(inp, deformation):
    _, h_old, w_old, _ = deformation.shape
    _, _, h, w = inp.shape

    if h_old != h or w_old != w:
        deformation = F.transpose(deformation, (0, 3, 1, 2))
        deformation = F.interpolate(deformation, output_size=(
            h, w), mode="linear", align_corners=False, half_pixel=True)
        deformation = F.transpose(deformation, (0, 2, 3, 1))

    return F.warp_by_grid(inp, deformation, align_corners=True)
コード例 #14
0
ファイル: colornet.py プロジェクト: sony/nnabla-examples
def conv_up(x, out_ch, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name=None):
    upsample = F.interpolate(x,
                             scale=(2, 2),
                             mode='nearest',
                             align_corners=False)
    conv = PF.convolution(upsample,
                          out_ch,
                          kernel=kernel,
                          pad=pad,
                          stride=stride,
                          name=name)
    return conv
コード例 #15
0
ファイル: interpolate.py プロジェクト: donproc/nnabla
def interpolate_data_grad_backward(inputs, output_size, mode, align_corners=True,
                                   half_pixel=False, half_pixel_for_nn=False, channel_last=False):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    gdy = F.interpolate(gdx, None, output_size, mode, align_corners,
                        half_pixel, half_pixel_for_nn, channel_last)
    return gdy
コード例 #16
0
ファイル: loss.py プロジェクト: sony/nnabla-examples
def loss_dis_real(logits, rec_imgs, part, img, lmd=1.0):
    # loss = 0.0

    # Hinge loss (following the official implementation)
    loss = F.mean(F.relu(0.2*F.rand(shape=logits.shape) + 0.8 - logits))

    # Reconstruction loss for rec_img_big (reconstructed from 8x8 features of the original image)
    # Reconstruction loss for rec_img_small (reconstructed from 8x8 features of the resized image)
    # Reconstruction loss for rec_img_part (reconstructed from a part of 16x16 features of the original image)
    if lmd > 0.0:
        # Ground-truth
        img_128 = F.interpolate(img, output_size=(128, 128))
        img_256 = F.interpolate(img, output_size=(256, 256))

        img_half = F.where(F.greater_scalar(
            part[0], 0.5), img_256[:, :, :128, :], img_256[:, :, 128:, :])
        img_part = F.where(F.greater_scalar(
            part[1], 0.5), img_half[:, :, :, :128], img_half[:, :, :, 128:])

        # Integrated perceptual loss
        loss = loss + lmd * \
            reconstruction_loss_lpips(rec_imgs, [img_128, img_part])

    return loss
コード例 #17
0
def get_t_d(conf, r_inputs, d_data):
    """
    Create Real and fake temoral discriminators
    """
    # to crop out unstable part for temporal discriminator, details in TecoGAN supplemental paper
    crop_size_dt = int(conf.train.crop_size * 4 * conf.gan.crop_dt)
    offset_dt = (conf.train.crop_size * 4 - crop_size_dt) // 2
    crop_size_dt = conf.train.crop_size * 4 - offset_dt * 2
    paddings = (0, 0, offset_dt, offset_dt, offset_dt, offset_dt, 0, 0)

    with nn.parameter_scope("discriminator"):
        real_warp = warp_by_flow(d_data.t_targets, d_data.t_vel)
        real_warp = space_to_depth_disc(real_warp, d_data.t_batch)

        # equivalent to tf.image.crop_to_bounding_box
        real_warp = real_warp[:, offset_dt:offset_dt + crop_size_dt,
                              offset_dt:offset_dt + crop_size_dt, :]
        real_warp = F.pad(real_warp, paddings)
        before_warp = space_to_depth_disc(d_data.t_targets, d_data.t_batch)
        t_input = space_to_depth_disc(r_inputs[:, :d_data.t_size, :, :, :],
                                      d_data.t_batch)
        # resizing using bilinear interpolation
        input_hi = F.interpolate(t_input,
                                 scale=(4, 4),
                                 mode='linear',
                                 channel_last=True)
        real_warp = F.concatenate(before_warp, real_warp, input_hi)

        tdiscrim_real_output, real_layers = discriminator(real_warp)

        fake_warp = warp_by_flow(d_data.t_gen_output, d_data.t_vel)
        fake_warp = space_to_depth_disc(fake_warp, d_data.t_batch)
        fake_warp = fake_warp[:, offset_dt:offset_dt + crop_size_dt,
                              offset_dt:offset_dt + crop_size_dt, :]
        fake_warp = F.pad(fake_warp, paddings)
        before_warp = space_to_depth_disc(d_data.t_gen_output,
                                          d_data.t_batch,
                                          inplace=False)
        fake_warp = F.concatenate(before_warp, fake_warp, input_hi)
        tdiscrim_fake_output, fake_layers = discriminator(fake_warp)

    temporal_disc = collections.namedtuple(
        'temporal_disc', 'tdiscrim_real_output,'
        'real_layers, tdiscrim_fake_output, fake_layers')
    return temporal_disc(tdiscrim_real_output=tdiscrim_real_output,
                         real_layers=real_layers,
                         tdiscrim_fake_output=tdiscrim_fake_output,
                         fake_layers=fake_layers)
コード例 #18
0
def spade(x, m, hidden_dim=128, kernel=(3, 3), norm_type="in"):
    """
    Spatially-Adaptive Normalization proposed in Semantic Image Synthesis with Spatially-Adaptive Normalization (https://arxiv.org/pdf/1903.07291.pdf).


    Args:
        x (nn.Variable): Input variable for spade layer.
        m (nn.Variable):
            Spatial condition variable like object_id mask segmentation.
            This is for generating adaptive scale(gamma) and adaptice bias(beta) applied after normalization.
        hidden_dim (int): Hidden dims for first convolution applied to m.
        kernel (list of int): Kernel shapes for convolutions.
        norm_type (str) : A type of normalization. ["in", "bn"] are supported now.
    """
    # x: (N, Cx, H, W), m: (N, Cm, H, W)
    assert len(x.shape) == 4 and len(m.shape) == 4

    pad = tuple(i // 2 for i in kernel)
    c_dim = x.shape[1]
    conv_args = dict(kernel=kernel, pad=pad)
    with ps("spatial_adaptive_normalization"):
        normalized = _normalize(x, norm_type)

        m = F.interpolate(m, output_size=x.shape[2:], mode="nearest")

        with ps("shared"):
            actv = F.relu(
                PF.convolution(m,
                               hidden_dim,
                               w_init=w_init(m, hidden_dim),
                               **conv_args))

        with ps("gamma"):
            gamma = PF.convolution(actv,
                                   c_dim,
                                   w_init=w_init(actv, c_dim),
                                   **conv_args)

        with ps("beta"):
            beta = PF.convolution(actv,
                                  c_dim,
                                  w_init=w_init(actv, c_dim),
                                  **conv_args)

    return normalized * gamma + beta
コード例 #19
0
def deeplabv3plus_model(x,
                        output_stride,
                        num_classes,
                        test=False,
                        fix_params=False):
    '''Encoder 
    '''
    # Get decoder endpoints from backbone
    endpoints = xception.xception_65(x, test=test, fix_params=fix_params)
    low_level_features = endpoints['Decoder End Point 1']

    encoder_output = atrous_spatial_pyramid_pooling(
        endpoints['Decoder End Point 2'],
        output_stride,
        test=test,
        fix_params=fix_params)

    with nn.parameter_scope("concat_projection"):
        encoder_output = PF.convolution(encoder_output,
                                        256, (1, 1),
                                        with_bias=False,
                                        fix_parameters=fix_params)
        encoder_output = F.relu(
            PF.batch_normalization(encoder_output,
                                   batch_stat=not test,
                                   fix_parameters=fix_params))
    '''Decoder 
    '''
    with nn.parameter_scope("decoder"):
        with nn.parameter_scope("upsample1"):
            upsampled = F.interpolate(
                encoder_output,
                output_size=(low_level_features.shape[2],
                             low_level_features.shape[2]),
                mode='linear')

        h = decoder(low_level_features,
                    upsampled,
                    num_classes,
                    test=test,
                    fix_params=fix_params)

    return h
コード例 #20
0
    def hg_module(n, x):
        with nn.parameter_scope(f"{n - 1}.0.0"):
            up1 = ops[n - 1][0](x)
        low1 = F.max_pooling(x, kernel=(2, 2), stride=(2, 2))
        with nn.parameter_scope(f"{n - 1}.1.0"):
            low1 = ops[n - 1][1](low1)

        if n > 1:
            low2 = hg_module(n - 1, low1)
        else:
            with nn.parameter_scope(f"{n - 1}.3.0"):
                low2 = ops[n - 1][3](low1)
        with nn.parameter_scope(f"{n - 1}.2.0"):
            low3 = ops[n - 1][2](low2)

        up2 = F.interpolate(low3, scale=(2, 2), mode="nearest")

        out = up1 + up2
        return out
コード例 #21
0
ファイル: modules.py プロジェクト: saccadic/nnabla-examples
def upblock(x,
            out_features,
            kernel_size=3,
            padding=1,
            groups=1,
            test=False,
            comm=None):
    if comm:
        batchnorm = functools.partial(PF.sync_batch_normalization,
                                      comm=comm,
                                      group='world',
                                      axes=[1],
                                      decay_rate=0.9,
                                      eps=1e-05,
                                      batch_stat=not test)
    else:
        # 1 GPU
        batchnorm = functools.partial(PF.batch_normalization,
                                      axes=[1],
                                      decay_rate=0.9,
                                      eps=1e-05,
                                      batch_stat=not test)

    inmaps, outmaps = x.shape[1], out_features
    k_w = I.calc_normal_std_he_forward(
        inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.)
    k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.)
    w_init = I.UniformInitializer((-k_w, k_w))
    b_init = I.UniformInitializer((-k_b, k_b))

    out = F.interpolate(x, scale=(2, 2), mode="nearest")
    with nn.parameter_scope("upblock"):
        out = PF.convolution(out,
                             outmaps=out_features,
                             kernel=(kernel_size, kernel_size),
                             pad=(padding, padding),
                             group=groups,
                             w_init=w_init,
                             b_init=b_init)
        out = batchnorm(out)
    out = F.relu(out, inplace=True)
    return out
コード例 #22
0
ファイル: lpips.py プロジェクト: sony/nnabla-examples
    def __call__(self, img0, img1, normalize=False, mean_batch=False):
        """
            Args:
               img0, img1(Variable): Variable containing images. N batch images can be used. 
               normalize(bool): if True, assumes inputs are in [0., 1.] and scales the inputs between [-1., +1.].
                                if False, assumes inputs are in [-1., +1.]
        """
        assert img0.shape == img1.shape, "img0 and img1 have different shape."
        assert isinstance(img0, nn.Variable), "img0 is not Variable."
        assert isinstance(img1, nn.Variable), "img1 is not Variable."

        if normalize:
            # scales the input between [-1., +1.]
            img0 = 2 * img0 - 1
            img1 = 2 * img1 - 1

        if self.apply_scale:
            img0 = (img0 - self._shift) / self._scale
            img1 = (img1 - self._shift) / self._scale

        dists = compute_each_feat_dist(img0,
                                       img1,
                                       feat_extractor=self.feat_extractor)

        if self.spatial:
            # note that this upsampling method is different from the original LPIPS.
            # in the original implementation, it is torch.nn.upsample(mode="bilinear")
            dists = [
                F.interpolate(dist * (1. * img0.shape[2] / dist.shape[2]),
                              output_size=img0.shape[2:]) for dist in dists
            ]
        else:
            dists = [
                F.mean(dist, axis=[2, 3], keepdims=True) for dist in dists
            ]
        # returns N scores ((N, 1, 1, 1))
        lpips_val = F.sum(F.stack(*dists), axis=0)

        if mean_batch:
            lpips_val = F.mean(lpips_val, axis=0)

        return lpips_val
コード例 #23
0
ファイル: modules.py プロジェクト: saccadic/nnabla-examples
def anti_alias_interpolate(input, channels, scale):
    # no trainable parameters exist.
    if scale == 1.0:
        # no interpolation executed
        return F.identity(input)

    sigma = (1 / scale - 1) / 2
    kernel_size = 2 * round(sigma * 4) + 1
    ka = kernel_size // 2
    if kernel_size % 2 == 0:
        kb = ka - 1
    else:
        kb = ka

    kernel_size = [kernel_size, kernel_size]
    sigma = [sigma, sigma]
    kernel = 1

    xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1))
    ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1))
    meshgrids = (F.tile(xa,
                        (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1)))

    for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
        mean = (size - 1) / 2
        kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2))

    kernel = kernel / F.sum(kernel, keepdims=True)
    # Reshape to depthwise convolutional weight
    kernel = F.reshape(kernel, (1, 1) + kernel.shape)
    kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size))
    # if using the pre-computed kernel, no need to compute here.

    out = F.pad(input, (ka, kb, ka, kb))
    out = F.convolution(out, weight=kernel, group=channels)
    out = F.interpolate(out, scale=(scale, scale), mode="nearest")

    return out
コード例 #24
0
def hour_glass(inp, depth, num_features):
    # Upper branch
    up1 = inp
    with nn.parameter_scope('b1_' + str(depth)):
        up1 = conv_block(up1, num_features, num_features)

    # Lower branch
    low1 = F.average_pooling(inp, (2, 2), stride=(2, 2))
    with nn.parameter_scope('b2_' + str(depth)):
        low1 = conv_block(low1, num_features, num_features)

    if depth > 1:
        low2 = hour_glass(low1, depth - 1, num_features)
    else:
        low2 = low1
        with nn.parameter_scope('b2_plus_' + str(depth)):
            low2 = conv_block(low2, num_features, num_features)

    low3 = low2
    with nn.parameter_scope('b3_' + str(depth)):
        low3 = conv_block(low3, num_features, num_features)
    up2 = F.interpolate(low3, scale=(2, 2), mode='nearest')
    return up1 + up2
コード例 #25
0
    def __init__(self, nf, image_shape, ext_upsamples=0):
        ext_upsamples = int(ext_upsamples)
        assert isinstance(ext_upsamples, int) and 0 <= ext_upsamples <= 2,\
            "ext_upsamples must be in the range of [0, 2]."

        self.nf = nf
        self.image_shape = image_shape
        self.num_upsample = 5 + ext_upsamples

        self.head_0 = SpadeResidualBlock(16 * nf)

        self.G_middle_0 = SpadeResidualBlock(16 * nf)
        self.G_middle_1 = SpadeResidualBlock(16 * nf)

        self.up_0 = SpadeResidualBlock(8 * nf)
        self.up_1 = SpadeResidualBlock(4 * nf)
        self.up_2 = SpadeResidualBlock(2 * nf)
        self.up_3 = SpadeResidualBlock(nf)

        if self.num_upsample > 6:
            self.up_4 = SpadeResidualBlock(nf // 2)

        self.up = lambda x: F.interpolate(x, scale=(2, 2), mode="nearest")
コード例 #26
0
def decoder(x, upsampled, num_classes, test=False, fix_params=False):

    # Project low-level features
    with nn.parameter_scope("feature_projection0"):
        h = PF.convolution(x,
                           48, (1, 1),
                           with_bias=False,
                           fix_parameters=fix_params)
        h = F.relu(
            PF.batch_normalization(h,
                                   batch_stat=not test,
                                   fix_parameters=fix_params))

    h = F.concatenate(upsampled, h, axis=1)

    for i in range(2):
        with nn.parameter_scope("decoder_conv" + str(i)):
            h = xception.separable_conv_with_bn(h,
                                                256,
                                                last_block=True,
                                                eps=1e-05,
                                                out=True,
                                                test=test,
                                                fix_params=fix_params)

    with nn.parameter_scope("logits/affine"):
        h = PF.convolution(h,
                           num_classes, (1, 1),
                           with_bias=True,
                           fix_parameters=fix_params)  # no activation

    with nn.parameter_scope("upsample2"):
        h = F.interpolate(h,
                          output_size=(h.shape[2] * 4 - 3, h.shape[2] * 4 - 3),
                          mode='linear')

    return h
コード例 #27
0
 def graph(x0):
     # F.swish -> F.interpolate
     x1 = F.swish(x0)
     x1.apply(recompute=True)
     x2 = F.interpolate(x1, scale=(2,))
     return x2
コード例 #28
0
    def visualize(self, driving, source, out):
        images = []

        # Source image with keypoints
        if isinstance(source, nn.Variable):
            source = source.d
        kp_source = out['kp_source']['value'].d
        source = np.transpose(source, [0, 2, 3, 1])
        images.append((source, kp_source))

        # Equivariance visualization, not used when animation (eval)
        if 'transformed_frame' in out:
            transformed = out['transformed_frame'].d
            transformed = np.transpose(transformed, [0, 2, 3, 1])
            transformed_kp = out['transformed_kp']['value'].d
            images.append((transformed, transformed_kp))

        # Driving image with keypoints
        kp_driving = out['kp_driving']['value'].d
        if isinstance(driving, nn.Variable):
            driving = driving.d
        driving = np.transpose(driving, [0, 2, 3, 1])
        images.append((driving, kp_driving))

        # Deformed image
        if 'deformed' in out:
            deformed = out['deformed'].d
            deformed = np.transpose(deformed, [0, 2, 3, 1])
            images.append(deformed)

        # Result with and without keypoints
        prediction = out['prediction'].d
        prediction = np.transpose(prediction, [0, 2, 3, 1])
        if 'kp_norm' in out:
            kp_norm = out['kp_norm']['value'].d
            images.append((prediction, kp_norm))
        images.append(prediction)

        # Occlusion map
        if 'occlusion_map' in out:
            with nn.auto_forward():
                occlusion_map = F.tile(out['occlusion_map'], (1, 3, 1, 1))
                occlusion_map = F.interpolate(
                    occlusion_map, output_size=source.shape[1:3], mode='nearest')
            occlusion_map = np.transpose(occlusion_map.d, [0, 2, 3, 1])
            images.append(occlusion_map)

        # Deformed images according to each individual transform
        if 'sparse_deformed' in out:
            full_mask = []
            for i in range(out['sparse_deformed'].shape[1]):
                with nn.auto_forward():
                    image = out['sparse_deformed'][:, i]
                    image = F.interpolate(
                        image, output_size=source.shape[1:3], mode='nearest')
                    mask = F.tile(out['mask'][:, i:(i + 1)], (1, 3, 1, 1))
                    mask = F.interpolate(
                        mask, output_size=source.shape[1:3], mode='nearest')
                image = np.transpose(image.d, (0, 2, 3, 1))
                mask = np.transpose(mask.d, (0, 2, 3, 1))

                if i != 0:
                    color = np.array(self.colormap(
                        (i - 1) / (out['sparse_deformed'].shape[1] - 1)))[:3]
                else:
                    color = np.array((0, 0, 0))

                color = color.reshape((1, 1, 1, 3))

                images.append(image)
                if i != 0:
                    images.append(mask * color)
                else:
                    images.append(mask)

                full_mask.append(mask * color)

            images.append(sum(full_mask))

        image = self.create_image_grid(*images)
        image = (255 * image).astype(np.uint8)
        return image
コード例 #29
0
ファイル: models.py プロジェクト: sony/nnabla-examples
def pcd_align(fea1, fea2):
    """
    Alignment module using Pyramid, Cascading and Deformable convolution
    with 3 pyramid levels[L1, L2, L3].
    """

    num_filters = 64
    deformable_groups = 8
    kernel_sz, stride_ln, pad_ln = 3, 1, 1

    def deform_conv(fea, offset_input, name):
        """
        deformable convolution block
        """

        with nn.parameter_scope(name):
            channels_ = deformable_groups * 3 * kernel_sz * kernel_sz
            conv_offset_mask = conv2d(offset_input, channels_, kernel_sz, stride_ln, pad_ln,
                                      bias=True,
                                      name='conv_offset_mask')
            channels = channels_ / 3
            offset = conv_offset_mask[:, :2 * channels, :, :]
            mask = F.sigmoid(
                conv_offset_mask[:, 2 * channels:3 * channels, :, :])
            deform_conv = PF.deformable_convolution(fea, num_filters, (kernel_sz, kernel_sz),
                                                    offset, mask,
                                                    deformable_group=deformable_groups,
                                                    stride=(
                                                           stride_ln, stride_ln),
                                                    pad=(pad_ln, pad_ln),
                                                    dilation=(1, 1), with_bias=True)
        return deform_conv

    y = []
    with nn.parameter_scope('pcd_align'):
        # fea1
        # L3: level 3, 1/4 spatial size
        l3_offset = F.concatenate(fea1[2], fea2[2], axis=1)
        l3_offset = F.leaky_relu(
            conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l3_offset_conv1_1'))
        l3_offset = F.leaky_relu(
            conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l3_offset_conv2_1'))
        l3_fea = F.leaky_relu(deform_conv(
            fea1[2], l3_offset, name='l3_dcnpack_1'))

        # L2: level 2, 1/2 spatial size
        l2_offset = F.concatenate(fea1[1], fea2[1], axis=1)
        l2_offset = F.leaky_relu(
            conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv1_1'))
        l3_offset = F.interpolate(l3_offset, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l2_offset = F.leaky_relu(
            conv2d(F.concatenate(l2_offset, l3_offset * 2, axis=1), num_filters, kernel_sz,
                   stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv2_1'))
        l2_offset = F.leaky_relu(
            conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv3_1'))
        l2_fea = deform_conv(fea1[1], l2_offset, name='l2_dcnpack_1')
        l3_fea = F.interpolate(l3_fea, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l2_fea = F.leaky_relu(
            conv2d(F.concatenate(l2_fea, l3_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln,
                   bias=True, name='l2_fea_conv_1'))

        # L1: level 1, original spatial size
        l1_offset = F.concatenate(fea1[0], fea2[0], axis=1)
        l1_offset = F.leaky_relu(
            conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv1_1'))
        l2_offset = F.interpolate(l2_offset, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l1_offset = F.leaky_relu(
            conv2d(F.concatenate(l1_offset, l2_offset * 2, axis=1), num_filters, kernel_sz,
                   stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv2_1'))
        l1_offset = F.leaky_relu(
            conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv3_1'))
        l1_fea = deform_conv(fea1[0], l1_offset, name='l1_dcnpack_1')
        l2_fea = F.interpolate(l2_fea, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l1_fea = conv2d(F.concatenate(l1_fea, l2_fea, axis=1), num_filters, kernel_sz, stride_ln,
                        pad_ln, bias=True,
                        name='l1_fea_conv_1')
        y.append(l1_fea)

        # fea2
        # L3: level 3, 1/4 spatial size
        l3_offset = F.concatenate(fea2[2], fea1[2], axis=1)
        l3_offset = F.leaky_relu(
            conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l3_offset_conv1_2'))
        l3_offset = F.leaky_relu(
            conv2d(l3_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l3_offset_conv2_2'))
        l3_fea = F.leaky_relu(deform_conv(
            fea2[2], l3_offset, name='l3_dcnpack_2'))

        # L2: level 2, 1/2 spatial size
        l2_offset = F.concatenate(fea2[1], fea1[1], axis=1)
        l2_offset = F.leaky_relu(
            conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv1_2'))
        l3_offset = F.interpolate(l3_offset, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l2_offset = F.leaky_relu(
            conv2d(F.concatenate(l2_offset, l3_offset * 2, axis=1), num_filters, kernel_sz,
                   stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv2_2'))
        l2_offset = F.leaky_relu(
            conv2d(l2_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l2_offset_conv3_2'))
        l2_fea = deform_conv(fea2[1], l2_offset, name='l2_dcnpack_2')
        l3_fea = F.interpolate(l3_fea, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l2_fea = F.leaky_relu(
            conv2d(F.concatenate(l2_fea, l3_fea, axis=1), num_filters, kernel_sz, stride_ln, pad_ln,
                   bias=True, name='l2_fea_conv_2'))

        # L1: level 1, original spatial size
        l1_offset = F.concatenate(fea2[0], fea1[0], axis=1)
        l1_offset = F.leaky_relu(
            conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv1_2'))
        l2_offset = F.interpolate(l2_offset, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l1_offset = F.leaky_relu(
            conv2d(F.concatenate(l1_offset, l2_offset * 2, axis=1), num_filters, kernel_sz,
                   stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv2_2'))
        l1_offset = F.leaky_relu(
            conv2d(l1_offset, num_filters, kernel_sz, stride_ln, pad_ln, bias=True,
                   name='l1_offset_conv3_2'))
        l1_fea = deform_conv(fea2[0], l1_offset, name='l1_dcnpack_2')
        l2_fea = F.interpolate(l2_fea, scale=(
            2, 2), mode='linear', align_corners=False, half_pixel=True)
        l1_fea = conv2d(F.concatenate(l1_fea, l2_fea, axis=1), num_filters, kernel_sz, stride_ln,
                        pad_ln, bias=True,
                        name='l1_fea_conv_2')
        y.append(l1_fea)

    y = F.concatenate(*y, axis=1)
    return y
コード例 #30
0
ファイル: discriminator.py プロジェクト: sony/nnabla-examples
def Discriminator(img, label="real", scope_name="Discriminator", ndf=64):
    with nn.parameter_scope(scope_name):
        if type(img) is not list:
            img_small = F.interpolate(img, output_size=(128, 128))
        else:
            img_small = img[1]
            img = img[0]

        def sn_w(w):
            return PF.spectral_norm(w, dim=0)

        # InitLayer: -> 256x256
        with nn.parameter_scope("init"):
            h = img
            if img.shape[2] == 1024:
                h = PF.convolution(h,
                                   ndf // 8, (4, 4),
                                   stride=(2, 2),
                                   pad=(1, 1),
                                   apply_w=sn_w,
                                   with_bias=False,
                                   name="conv1")
                h = F.leaky_relu(h, 0.2)
                h = PF.convolution(h,
                                   ndf // 4, (4, 4),
                                   stride=(2, 2),
                                   pad=(1, 1),
                                   apply_w=sn_w,
                                   with_bias=False,
                                   name="conv2")
                h = PF.batch_normalization(h)
                h = F.leaky_relu(h, 0.2)
            elif img.shape[2] == 512:
                h = PF.convolution(h,
                                   ndf // 4, (4, 4),
                                   stride=(2, 2),
                                   pad=(1, 1),
                                   apply_w=sn_w,
                                   with_bias=False,
                                   name="conv2")
                h = F.leaky_relu(h, 0.2)
            else:
                h = PF.convolution(h,
                                   ndf // 4, (3, 3),
                                   pad=(1, 1),
                                   apply_w=sn_w,
                                   with_bias=False,
                                   name="conv3")
                h = F.leaky_relu(h, 0.2)

        # Calc base features
        f_256 = h
        f_128 = DownsampleComp(f_256, ndf // 2, "down256->128")
        f_64 = DownsampleComp(f_128, ndf * 1, "down128->64")
        f_32 = DownsampleComp(f_64, ndf * 2, "down64->32")

        # Apply SLE
        f_32 = SLE(f_32, f_256, "sle256->32")
        f_16 = DownsampleComp(f_32, ndf * 4, "down32->16")
        f_16 = SLE(f_16, f_128, "sle128->16")
        f_8 = DownsampleComp(f_16, ndf * 16, "down16->8")
        f_8 = SLE(f_8, f_64, "sle64->8")

        # Conv + BN + LeakyRely + Conv -> logits (5x5)
        with nn.parameter_scope("last"):
            h = PF.convolution(f_8,
                               ndf * 16, (1, 1),
                               apply_w=sn_w,
                               with_bias=False,
                               name="conv1")
            h = PF.batch_normalization(h)
            h = F.leaky_relu(h, 0.2)
            logit_large = PF.convolution(h,
                                         1, (4, 4),
                                         apply_w=sn_w,
                                         with_bias=False,
                                         name="conv2")

        # Another path: "down_from_small" in the official code
        with nn.parameter_scope("down_from_small"):
            h_s = PF.convolution(img_small,
                                 ndf // 2, (4, 4),
                                 stride=(2, 2),
                                 pad=(1, 1),
                                 apply_w=sn_w,
                                 with_bias=False,
                                 name="conv1")
            h_s = F.leaky_relu(h_s, 0.2)
            h_s = Downsample(h_s, ndf * 1, "dfs64->32")
            h_s = Downsample(h_s, ndf * 2, "dfs32->16")
            h_s = Downsample(h_s, ndf * 4, "dfs16->8")
            fea_dec_small = h_s
            logit_small = PF.convolution(h_s,
                                         1, (4, 4),
                                         apply_w=sn_w,
                                         with_bias=False,
                                         name="conv2")

        # Concatenate logits
        logits = F.concatenate(logit_large, logit_small, axis=1)

        # Reconstruct images
        rec_img_big = SimpleDecoder(f_8, "dec_big")
        rec_img_small = SimpleDecoder(fea_dec_small, "dec_small")
        part_ax2 = F.rand(shape=(img.shape[0], ))
        part_ax3 = F.rand(shape=(img.shape[0], ))
        f_16_ax2 = F.where(F.greater_scalar(part_ax2, 0.5), f_16[:, :, :8, :],
                           f_16[:, :, 8:, :])
        f_16_part = F.where(F.greater_scalar(part_ax3, 0.5),
                            f_16_ax2[:, :, :, :8], f_16_ax2[:, :, :, 8:])
        rec_img_part = SimpleDecoder(f_16_part, "dec_part")

    if label == "real":
        return logits, [rec_img_big, rec_img_small,
                        rec_img_part], [part_ax2, part_ax3]
    else:
        return logits