def get_tecogan_inputs(r_inputs, r_targets):
    """
    Generate and return the ping-pong sequence (forward and backward) from given inputs and targets
    """
    r_inputs = F.concatenate(r_inputs, r_inputs[:, -2::-1, :, :, :], axis=1)
    r_targets = F.concatenate(r_targets, r_targets[:, -2::-1, :, :, :], axis=1)
    return r_inputs, r_targets
def make_symmetric_matrix(_x):
    # input
    # _x : type=nn.Variable(), _x.shape=(batch_size, *, *, *)

    # output
    # j_vector : type=nn.Variable(), j_vector.shape=(batch_size, batch_size - 1, *, *, *)

    batch_size = _x.shape[0]
    var_list = F.split(_x)
    concat_list = []
    # --- split & gather components ---
    for i in range(batch_size):
        tmp_list = []
        for j in range(batch_size):
            if i != j:
                tmp_list.append(
                    F.reshape(var_list[j], [
                        1,
                    ] + list(var_list[j].shape)))
        if len(tmp_list) > 1:
            concat_var = F.concatenate(*tmp_list, axis=0)
        else:
            concat_var = tmp_list[0]
        concat_list.append(
            F.reshape(concat_var, [
                1,
            ] + list(concat_var.shape)))
    # --- concatenate ---
    j_vector = F.concatenate(*concat_list, axis=0)
    return j_vector
예제 #3
0
def compute_sample_points_for_variable_depth(ray_origins,
                                             ray_directions,
                                             near_plane,
                                             far_plane,
                                             num_samples,
                                             randomize=False):

    depth_steps = F.arange(0, 1 + 1 / num_samples, 1 / (num_samples - 1))
    depth_steps = F.broadcast(depth_steps[None, :],
                              (far_plane.shape[0], depth_steps.shape[0]))
    depth_values = near_plane[:, None] * \
        (1-depth_steps) + far_plane[:, None] * depth_steps

    if randomize:
        depth_vals_mid = 0.5 * (depth_values[:, :-1] + depth_values[:, 1:])
        # get intervals between samples
        upper = F.concatenate(depth_vals_mid, depth_values[:, -1:], axis=-1)
        lower = F.concatenate(depth_values[:, :1], depth_vals_mid, axis=-1)

        noise = F.rand(shape=depth_values.shape)
        depth_values = lower + (upper - lower) * noise

    sample_points = ray_origins[..., None, :] + \
        ray_directions[..., None, :]*depth_values[..., :, None]

    return sample_points, depth_values
예제 #4
0
def build_model():
    x = nn.Variable((batch_size, sentence_length_source))
    mask = get_mask(x)
    y = nn.Variable((batch_size, sentence_length_target))

    enc_input = time_distributed(PF.embed)(
        x, vocab_size_source, embedding_size, name='enc_embeddings') * mask
    # -> (batch_size, sentence_length_source, embedding_size)

    dec_input = F.concatenate(F.constant(w2i_target['<bos>'],
                                         shape=(batch_size, 1)),
                              y[:, :sentence_length_target - 1],
                              axis=1)

    dec_input = time_distributed(PF.embed)(dec_input,
                                           vocab_size_target,
                                           embedding_size,
                                           name='dec_embeddings')
    # -> (batch_size, sentence_length_target, embedding_size)

    # encoder
    with nn.parameter_scope('encoder'):
        enc_output, c, h = lstm(enc_input,
                                hidden,
                                mask=mask,
                                return_sequences=True,
                                return_state=True)
        # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden)

    # decoder
    with nn.parameter_scope('decoder'):
        dec_output = lstm(dec_input,
                          hidden,
                          initial_state=(c, h),
                          return_sequences=True)
        # -> (batch_size, sentence_length_target, hidden)

        attention_output = global_attention(dec_output,
                                            enc_output,
                                            mask=mask,
                                            score='dot')
        # -> (batch_size, sentence_length_target, hidden)

    output = F.concatenate(dec_output, attention_output, axis=2)

    output = time_distributed(PF.affine)(output,
                                         vocab_size_target,
                                         name='output')
    # -> (batch_size, sentence_length_target, vocab_size_target)

    t = F.reshape(y, (batch_size, sentence_length_target, 1))

    entropy = time_distributed_softmax_cross_entropy(output, t)

    mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
    count = F.sum(mask, axis=1)

    entropy *= mask
    loss = F.mean(F.sum(entropy, axis=1) / count)
    return x, y, loss
예제 #5
0
def main():
    """
        Inference function to generate SR images.
    """
    nn.load_parameters(args.model)
    # Inference data loader
    inference_data = inference_data_loader(args.input_dir_lr)
    input_shape = [
        1,
    ] + list(inference_data.inputs[0].shape)
    output_shape = [1, input_shape[1] * 4, input_shape[2] * 4, 3]
    oh = input_shape[1] - input_shape[1] // 8 * 8
    ow = input_shape[2] - input_shape[2] // 8 * 8

    # Build the computation graph
    inputs_raw = nn.Variable(input_shape)
    pre_inputs = nn.Variable(input_shape)
    pre_gen = nn.Variable(output_shape)
    pre_warp = nn.Variable(output_shape)

    transposed_pre_warp = space_to_depth(pre_warp)
    inputs_all = F.concatenate(inputs_raw, transposed_pre_warp)
    with nn.parameter_scope("generator"):
        gen_output = generator(inputs_all, 3, args.num_resblock)
    outputs = (gen_output + 1) / 2
    inputs_frames = F.concatenate(pre_inputs, inputs_raw)
    with nn.parameter_scope("fnet"):
        flow_lr = flow_estimator(inputs_frames)
    flow_lr = F.pad(flow_lr, (0, 0, 0, oh, 0, ow, 0, 0), "reflect")
    flow_hr = upscale_four(flow_lr * 4.0)
    pre_gen_warp = warp_by_flow(pre_gen, flow_hr)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    max_iter = len(inference_data.inputs)
    print('Frame evaluation starts!!')
    pre_inputs.d, pre_gen.d, pre_warp.d = 0, 0, 0
    for i in range(max_iter):
        inputs_raw.d = np.array([inference_data.inputs[i]]).astype(np.float32)
        if i != 0:
            pre_gen_warp.forward()
            pre_warp.data.copy_from(pre_gen_warp.data)
        outputs.forward()
        output_frame = outputs.d

        if i >= 5:
            name, _ = os.path.splitext(
                os.path.basename(str(inference_data.paths_lr[i])))
            filename = args.output_name + '_' + name
            print('saving image %s' % filename)
            out_path = os.path.join(args.output_dir,
                                    "%s.%s" % (filename, args.output_ext))
            save_img(out_path, output_frame[0])
        else:  # First 5 is a hard-coded symmetric frame padding, ignored but time added!
            print("Warming up %d" % (5 - i))

        pre_inputs.data.copy_from(inputs_raw.data)
        pre_gen.data.copy_from(outputs.data)
예제 #6
0
 def call(self, x1, x2):
     y1 = self.conv_bn_1(x1)
     y2 = self.conv_bn_2(x2)
     y = F.concatenate(y1, y2, axis=1)
     # ConvBn() will be destroyed when leave this scope.
     # Thus, the parameters owned by `cb` object will be released too.
     cb = ConvBn(1)
     y = F.concatenate(y, cb(x1), axis=1)
     return y
예제 #7
0
def network_LSTM(x, D, C, InputShape, HiddenSize, test=False):
    # Input_2:x -> 687
    # Delya_in:D -> 100
    # Cell_in:C -> 100

    # Concatenate -> 787
    h = F.concatenate(D, x, axis=1)

    # Affine -> 100
    h1 = PF.affine(h, HiddenSize, name='Affine')

    # InputGate -> 100
    h2 = PF.affine(h, HiddenSize, name='InputGate')

    # OutputGate -> 100
    h3 = PF.affine(h, HiddenSize, name='OutputGate')

    # ForgetGate -> 100
    h4 = PF.affine(h, HiddenSize, name='ForgetGate')
    # Sigmoid
    h1 = F.sigmoid(h1)
    # Sigmoid_2
    h2 = F.sigmoid(h2)

    # Sigmoid_3
    h3 = F.sigmoid(h3)
    # Sigmoid_4
    h4 = F.sigmoid(h4)

    # Mul2 -> 100
    h1 = F.mul2(h1, h2)

    # Mul2_3 -> 100
    h4 = F.mul2(h4, C)

    # Add2 -> 100
    h1 = F.add2(h1, h4, True)

    # Tanh
    h5 = F.tanh(h1)

    # Cell_out
    h6 = F.identity(h1)

    # Mul2_2 -> 100
    h5 = F.mul2(h5, h3)
    # Dropout
    if not test:
        h5 = F.dropout(h5)

    # Output
    h5 = F.identity(h5)

    # Concatenate_2 -> 200
    h5 = F.concatenate(h5, h6, axis=1)
    return h5
예제 #8
0
def sdr_loss(mix, pred, gt_time):
    # SDR-Combination Loss
    # mix     -> (BatchSize(16), 2(1 source x 2 channels), TimeLen)  -> (B, C, T)
    # pred    -> (4(sources), Bsize, 2(channels), Len)               -> (S, B, C, T)
    # gt_time -> (BatchSize(16), 8(4 source x 2 channels), TimeLen)  -> (B, S*C, T)
    #                       channel-dim -> [bass1, bass2, drums1, drums2, ...]

    _, batch_size, n_channels, length = pred.shape

    # Fix Length
    mix = mix[Ellipsis, :length]
    gt_time = gt_time[Ellipsis, :length]

    # Fix Shape
    mix = unsqueeze(mix)  # [1, B, C, T]
    gt_time = unsqueeze(gt_time)  # [1, B, S*C, T]
    data_t = mix  # [1, B, C, T]

    for i in range(4):
        data_t = F.concatenate(data_t, gt_time[Ellipsis, 2*i:2*i+2, :], axis=0)

    data_t = F.reshape(data_t, (-1, length))  # [5*B*C, T]
    pred = F.reshape(pred, (batch_size*n_channels *
                            pred.shape[0], pred.shape[-1]))  # [B*C*S, T]

    # Combination List (4C2 + 4C3)
    combi_list = [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4),
                  (1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)]
    for combi in combi_list:
        if len(combi) == 2:
            tmp_data = data_t[batch_size*n_channels*combi[0]:batch_size*n_channels*(
                combi[0]+1), Ellipsis] + data_t[batch_size*n_channels*combi[1]:batch_size*n_channels*(combi[1]+1), Ellipsis]

            tmp_pred = pred[batch_size*n_channels*(combi[0]-1):batch_size*n_channels*combi[0], Ellipsis] + \
                pred[batch_size*n_channels*(
                                                   combi[1]-1):batch_size*n_channels*combi[1], Ellipsis]
        else:
            tmp_data = data_t[batch_size*n_channels*combi[0]:batch_size*n_channels*(combi[0]+1), Ellipsis] + data_t[batch_size*n_channels*combi[1]:batch_size*n_channels*(
                combi[1]+1), Ellipsis] + data_t[batch_size*n_channels*combi[2]:batch_size*n_channels*(combi[2]+1), Ellipsis]

            tmp_pred = pred[batch_size*n_channels*(combi[0]-1):batch_size*n_channels*combi[0], Ellipsis] + pred[batch_size*n_channels*(
                combi[1]-1):batch_size*n_channels*combi[1], Ellipsis] + pred[batch_size*n_channels*(combi[2]-1):batch_size*n_channels*combi[2], Ellipsis]

        data_t = F.concatenate(data_t, tmp_data, axis=0)
        pred = F.concatenate(pred, tmp_pred, axis=0)

    # All 14 Combinations (4C1 + 4C2 + 4C3)
    mix_t = F.tile(data_t[:batch_size*n_channels, Ellipsis], (14, 1))
    data_t = data_t[batch_size*n_channels:, Ellipsis]

    # SDR Loss Calculation
    loss_sdr = sdr_loss_core(pred, data_t, mix_t, weighted=True)

    return 1.0 + loss_sdr
예제 #9
0
def bicubic_four(inputs, scope='bicubic_four'):
    """
    Equivalent to tf.image.resize_bicubic( inputs, (h*4, w*4) ) for a fix ratio of 4 FOR API <=1.13
    For API 2.0, tf.image.resize_bicubic will be different, old version is tf.compat.v1.image.resize_bicubic
    **Parallel Catmull-Rom Spline Interpolation Algorithm for Image Zooming Based on CUDA*[Wu et. al.]**
    """
    with nn.parameter_scope(scope):
        b, h, w, c = inputs.shape

        p_inputs = F.concatenate(inputs[:, :1, :, :], inputs,
                                 axis=1)  # pad top
        p_inputs = F.concatenate(p_inputs[:, :, :1, :], p_inputs,
                                 axis=2)  # pad left
        p_inputs = F.concatenate(p_inputs,
                                 p_inputs[:, -1:, :, :],
                                 p_inputs[:, -1:, :, :],
                                 axis=1)  # pad bottom
        p_inputs = F.concatenate(p_inputs,
                                 p_inputs[:, :, -1:, :],
                                 p_inputs[:, :, -1:, :],
                                 axis=2)  # pad right

        hi_res_bin = [p_inputs[:, bi:bi + h, :, :] for bi in range(4)]
        r = 0.75
        mat = np.float32([[0, 1, 0, 0], [-r, 0, r, 0],
                          [2 * r, r - 3, 3 - 2 * r, -r], [-r, 2 - r, r - 2,
                                                          r]])
        weights = [
            np.float32([1.0, t, t * t, t * t * t]).dot(mat)
            for t in [0.0, 0.25, 0.5, 0.75]
        ]

        hi_res_array = []  # [hi_res_bin[1]]
        for hi in range(4):
            cur_wei = weights[hi]
            cur_data = cur_wei[0] * hi_res_bin[0] + cur_wei[1] * hi_res_bin[1] + \
                cur_wei[2] * hi_res_bin[2] + cur_wei[3] * hi_res_bin[3]
            hi_res_array.append(cur_data)
        hi_res_y = F.stack(*hi_res_array, axis=2)  # shape (b,h,4,w,c)
        hi_res_y = F.reshape(hi_res_y, (b, h * 4, w + 3, c))
        hi_res_bin = [hi_res_y[:, :, bj:bj + w, :] for bj in range(4)]

        hi_res_array = []  # [hi_res_bin[1]]
        for hj in range(4):
            cur_wei = weights[hj]
            cur_data = cur_wei[0] * hi_res_bin[0] + cur_wei[1] * hi_res_bin[1] + \
                cur_wei[2] * hi_res_bin[2] + cur_wei[3] * hi_res_bin[3]
            hi_res_array.append(cur_data)
        hi_res = F.stack(*hi_res_array, axis=3)  # shape (b,h*4,w,4,c)
        hi_res = F.reshape(hi_res, (b, h * 4, w * 4, c))

    return hi_res
예제 #10
0
        def conv_bn_relu(h, i, name, skip=True):
            s = h
            imaps = h.shape[1]
            with nn.parameter_scope(name):
                h = PF.convolution(h, imaps, (3, 3), pad=(1, 1))
                h = PF.batch_normalization(h)
                h = F.relu(h)
            if not skip:
                return F.concatenate(*[h, s], axis=1) if i % 2 == 0 else h + s

            h = F.split(h, axis=1)
            h = [h_.reshape(h_.shape[:1] + (1, ) + h_.shape[1:]) for h_ in h]
            h = F.concatenate(*h, axis=1)
            return h
예제 #11
0
파일: ops.py 프로젝트: sony/nnabla-examples
def dyn_sep_up_operation(x, dr_k_v, dr_k_h, k_sz, sf):
    """
    Dynamic separable upsampling operation with 1D separable local kernels.
    x: [B, H, W, C], dr_k_v: [B, H, W, 41*sf*sf], dr_k_h: [B, H, W, 41*sf*sf]
    out: [B, H*sf, W*sf, C]
    """
    sz = x.shape
    pad = k_sz // 2  # local filter pad size
    # [B, H, W, C*sf*sf]
    out_v = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2))
    out_v.data.zero()
    # [B, H, W, C*sf*sf]
    out_h = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2))
    out_h.data.zero()
    img_pad = F.pad(x, (0, 0, pad, pad, 0, 0, 0, 0))
    img_pad_y = F.reshape(
        img_pad[:, :, :,
                0], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1))
    img_pad_y = F.tile(img_pad_y, [1, 1, 1, sf**2])
    img_pad_u = F.reshape(
        img_pad[:, :, :,
                1], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1))
    img_pad_u = F.tile(img_pad_u, [1, 1, 1, sf**2])
    img_pad_v = F.reshape(
        img_pad[:, :, :,
                2], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1))
    img_pad_v = F.tile(img_pad_v, [1, 1, 1, sf**2])
    img_pad = F.concatenate(img_pad_y, img_pad_u, img_pad_v, axis=3)

    # vertical 1D filter
    for i in range(k_sz):
        out_v = out_v + img_pad[:, i:i + sz[1], :, :] * F.tile(
            dr_k_v[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3])
    img_pad = F.pad(out_v, (0, 0, 0, 0, pad, pad, 0, 0))
    # horizontal 1D filter
    for i in range(k_sz):
        out_h = out_h + img_pad[:, :, i:i + sz[2], :] * F.tile(
            dr_k_h[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3])

    # depth to space upsampling (YUV)
    out = depth_to_space(out_h[:, :, :, 0:sf**2], sf)
    out = F.concatenate(out,
                        depth_to_space(out_h[:, :, :, sf**2:2 * sf**2], sf),
                        axis=3)
    out = F.concatenate(out,
                        depth_to_space(out_h[:, :, :, 2 * sf**2:3 * sf**2],
                                       sf),
                        axis=3)
    return out
예제 #12
0
 def __call__(self, x, z=None):
     b, c = x.shape[0:2]
     h = x
     h = self.affine_act(h, self.dims, name="fc0")
     h = self.affine_act(h, self.dims, name="fc1")
     h = self.affine_act(h, self.dims, name="fc2")
     h = self.affine_act(h, self.dims - (self.ldims + c), name="fc3")
     h = F.concatenate(*[x, z, h],
                       axis=1) if z is not None else F.concatenate(*[x, h],
                                                                   axis=1)
     h = self.affine_act(h, self.dims, name="fc4")
     h = self.affine_act(h, self.dims, name="fc5")
     h = self.affine_act(h, self.dims, name="fc6")
     y = self.last_affine(h, 1, name="fc7")
     return y
예제 #13
0
def get_t_d(conf, r_inputs, d_data):
    """
    Create Real and fake temoral discriminators
    """
    # to crop out unstable part for temporal discriminator, details in TecoGAN supplemental paper
    crop_size_dt = int(conf.train.crop_size * 4 * conf.gan.crop_dt)
    offset_dt = (conf.train.crop_size * 4 - crop_size_dt) // 2
    crop_size_dt = conf.train.crop_size * 4 - offset_dt * 2
    paddings = (0, 0, offset_dt, offset_dt, offset_dt, offset_dt, 0, 0)

    with nn.parameter_scope("discriminator"):
        real_warp = warp_by_flow(d_data.t_targets, d_data.t_vel)
        real_warp = space_to_depth_disc(real_warp, d_data.t_batch)

        # equivalent to tf.image.crop_to_bounding_box
        real_warp = real_warp[:, offset_dt:offset_dt + crop_size_dt,
                              offset_dt:offset_dt + crop_size_dt, :]
        real_warp = F.pad(real_warp, paddings)
        before_warp = space_to_depth_disc(d_data.t_targets, d_data.t_batch)
        t_input = space_to_depth_disc(r_inputs[:, :d_data.t_size, :, :, :],
                                      d_data.t_batch)
        # resizing using bilinear interpolation
        input_hi = F.interpolate(t_input,
                                 scale=(4, 4),
                                 mode='linear',
                                 channel_last=True)
        real_warp = F.concatenate(before_warp, real_warp, input_hi)

        tdiscrim_real_output, real_layers = discriminator(real_warp)

        fake_warp = warp_by_flow(d_data.t_gen_output, d_data.t_vel)
        fake_warp = space_to_depth_disc(fake_warp, d_data.t_batch)
        fake_warp = fake_warp[:, offset_dt:offset_dt + crop_size_dt,
                              offset_dt:offset_dt + crop_size_dt, :]
        fake_warp = F.pad(fake_warp, paddings)
        before_warp = space_to_depth_disc(d_data.t_gen_output,
                                          d_data.t_batch,
                                          inplace=False)
        fake_warp = F.concatenate(before_warp, fake_warp, input_hi)
        tdiscrim_fake_output, fake_layers = discriminator(fake_warp)

    temporal_disc = collections.namedtuple(
        'temporal_disc', 'tdiscrim_real_output,'
        'real_layers, tdiscrim_fake_output, fake_layers')
    return temporal_disc(tdiscrim_real_output=tdiscrim_real_output,
                         real_layers=real_layers,
                         tdiscrim_fake_output=tdiscrim_fake_output,
                         fake_layers=fake_layers)
예제 #14
0
def decoder(x: list,
            block_expansion: int,
            num_blocks=3,
            max_features=256,
            test=False,
            comm=None):
    up_blocks = []

    for i in range(num_blocks)[::-1]:
        up_block = functools.partial(upblock,
                                     out_features=min(max_features,
                                                      block_expansion *
                                                      (2**i)),
                                     kernel_size=3,
                                     padding=1,
                                     test=test,
                                     comm=comm)
        up_blocks.append(up_block)

    out = x.pop()  # Variable((B, 256, 32, 32)), the last feature from encoder

    for i, up_block in enumerate(up_blocks):
        with nn.parameter_scope(f"upblock_{i}"):
            out = up_block(out)
        skip = x.pop()
        out = F.concatenate(out, skip, axis=1)

    return out
예제 #15
0
 def __call__(self, features):
     upsampled_inputs = [
         F.interpolate(x,
                       output_size=features[0].shape[2:],
                       mode='linear',
                       align_corners=False,
                       half_pixel=True) for x in features
     ]
     inputs = F.concatenate(*upsampled_inputs, axis=1)
     out = self.conv2d(inputs,
                       self.hparams['channels'],
                       kernel_size=1,
                       stride=1,
                       bias=False,
                       name='convs/0/conv')
     out = F.relu(self.batch_norm(out, name='convs/0/bn'))
     out = self.conv2d(out,
                       self.hparams['num_classes'],
                       kernel_size=1,
                       stride=1,
                       bias=True,
                       name='conv_seg')
     out = F.interpolate(out,
                         output_size=self.output_size,
                         mode='linear',
                         align_corners=False,
                         half_pixel=True)
     if self.test:
         return F.softmax(out, axis=1)
     return out
예제 #16
0
def shortcut(x, ochannels, stride, shortcut_type, test, channel_last=False):
    axes = [3 if channel_last else 1]
    ichannels = x.shape[axes[0]]
    use_conv = shortcut_type.lower() == 'c'
    if ichannels != ochannels:
        assert (ichannels * 2 == ochannels) or (ichannels * 4 == ochannels)
        if shortcut_type.lower() == 'b':
            use_conv = True
    if use_conv:
        # Convolution does everything.
        # Matching channels, striding.
        with nn.parameter_scope("shortcut_conv"):
            x = pf_convolution(x,
                               ochannels, (1, 1),
                               stride=stride,
                               channel_last=channel_last)
            x = PF.batch_normalization(x, axes=axes, batch_stat=not test)
    else:
        if stride != (1, 1):
            # Stride
            x = F.average_pooling(x, (1, 1), stride, channel_last=channel_last)
        if ichannels != ochannels:
            # Zero-padding to channel axis
            ishape = x.shape
            if channel_last:
                zero_shape = (ishape[0],) + ishape[1:3] + \
                              (ochannels - ichannels,)
            else:
                zero_shape = (ishape[0], ochannels - ichannels) + ishape[-2:]
            zeros = F.constant(zero_shape, 0)
            x = F.concatenate(x, zeros, axis=1)
    return x
예제 #17
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        axis = self.forward_func.info.args["axis"]

        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[1]:
            maskp = F.greater_equal_scalar(x0, 0.0)
            maskn = maskp - 1.0
            g_dy_p = maskp * g_dx0
            g_dy_n = maskn * g_dx0
            g_dy_ = F.concatenate(*[g_dy_p, g_dy_n], axis=axis)
            if accum[1]:
                g_dy.copy_from(g_dy + g_dy_)
            else:
                g_dy.copy_from(g_dy_)
예제 #18
0
def simple_rnn(inputs, units, return_sequences=False, fix_parameters=False):
    '''
    A vanilla recurrent neural network layer
    Args:
        inputs (nnabla.Variable): A shape of [B, SentenceLength, EmbeddingSize].
        units (int): Dimensionality of the output space.
        return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence.
        fix_parameters (bool): Fix parameters (Set need_grad=False).
    Returns:
        nn.Variable: A shape [B, SentenceLength, units].
        or
        nn.Variable: A shape [B, units]
    '''

    hs = []
    batch_size = inputs.shape[0]
    sentence_length = inputs.shape[1]
    h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units)))

    inputs = F.split(inputs, axis=1) # split in the direction of sequence

    h = h0
    for x in inputs:
        h = F.tanh(PF.affine(F.concatenate(x, h, axis=1), units, fix_parameters=fix_parameters))
        hs.append(h)

    if return_sequences:
        hs = F.stack(*hs, axis=1)
        return hs
    else:
        return hs[-1]
예제 #19
0
def sin_cos_positional_embedding(x,
                                 num_encoding_functions,
                                 include_input=True,
                                 log_sampling=True):
    """Given coordinate positions of sampling points as a (N,3) array, this functions returns embeds each point with the sine and cosine function

    Args:
        x (nn.Variable or nn.NdArray): Shape is (N, 3). 
        num_encoding_functions (int): number of frequencies to encode for each grid position
        include_input (bool, optional): Whether include the original grid position along with the encoding of the position. Defaults to True.
        log_sampling (bool, optional): Sample logarithmically and not linearly. Defaults to True.

    Returns:
        [nn.Variable or nn.NdArray]: (N, num_encoding_functions*3*2+3) if include_input is True else (N, num_encoding_functions*3*2)
    """

    encoding = [x] if include_input else []

    if log_sampling:
        frequency_increments = F.arange(0, num_encoding_functions)
        frequency_bands = F.pow2(
            F.constant(2, shape=frequency_increments.shape),
            frequency_increments)
    else:
        frequency_bands = F.arange(2**0,
                                   2**(num_encoding_functions - 1) + 1e-5,
                                   (2**(num_encoding_functions - 1) - 1) /
                                   (num_encoding_functions - 1.0))

    for freq in frequency_bands:
        for func in [F.sin, F.cos]:
            encoding.append(func(x * F.reshape(freq, (1, 1))))
    return F.concatenate(*encoding, axis=x.ndim - 1)
예제 #20
0
    def shuffle_unit(x, scope_name, dn=False):
        """
        Figure. 2 (b) and (c) in https://arxiv.org/pdf/1707.01083.pdf
        """

        C = x.shape[1]
        h = x
        with nn.parameter_scope(scope_name):
            with nn.parameter_scope("gconv1"):
                h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0),
                                   group=groups,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h, True)

            with nn.parameter_scope("shuffle"):  # no meaning but semantics
                h = shuffle(h)

            with nn.parameter_scope("dconv"):
                stride = (2, 2) if dn else (1, 1)
                h = PF.depthwise_convolution(h, kernel=(3, 3), pad=(1, 1),
                                             stride=stride,
                                             with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)

            with nn.parameter_scope("gconv2"):
                h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0),
                                   group=groups,
                                   with_bias=False)
                h = PF.batch_normalization(h, batch_stat=not test)

            s = F.average_pooling(x, (2, 2)) if dn else x
            h = F.concatenate(*[h, s], axis=1) if dn else h + s
            h = F.relu(h)
        return h
예제 #21
0
def conv_lstm_cell(input_tensor, cur_state, n_filt, kernel_size):
    """
    conv lstm cell definition
    """

    def split(inp):
        _, channels, _, _ = inp.shape
        channels = channels / 4
        return inp[:, :channels, :, :], inp[:, channels:2 * channels, :, :], \
            inp[:, 2 * channels:3 * channels, :, :], \
            inp[:, 3 * channels:4 * channels, :, :]

    h_cur, c_cur = cur_state
    # concatenate along channel axis
    combined = F.concatenate(*[input_tensor, h_cur], axis=1)
    combined_conv = conv2d(combined, 4 * n_filt, kernel_size, 1, kernel_size // 2,
                           name='conv_lstm_cell')
    cc_i, cc_f, cc_o, cc_g = split(combined_conv)
    act_i = F.sigmoid(cc_i)
    act_f = F.sigmoid(cc_f)
    act_o = F.sigmoid(cc_o)
    act_g = F.tanh(cc_g)
    c_next = F.add2(act_f * c_cur, act_i * act_g)
    h_next = act_o * F.tanh(c_next)
    return h_next, c_next
예제 #22
0
파일: rnn.py 프로젝트: Pandinosaurus/nnabla
def _gru(x, h, w, b, with_bias):
    """GRU cell.
    Args:
        x (:obj:`~nnabla.Variable`): Input data.
        h (:obj:`~nnabla.Variable`): Hidden state.
        w (:obj:`~nnabla.Variable`): Weight.
        b (:obj:`~nnabla.Variable`): Bias.
        with_bias (bool): Include the bias or not.
    """
    hidden_size = h.shape[1]
    xh = F.concatenate(*(x, h), axis=1)
    w0, w1, w2 = F.split(w, axis=0)
    b0 = b1 = b2 = b3 = None
    if with_bias:
        b0, b1, b2, b3 = F.split(b, axis=0)
    r_t = F.sigmoid(F.affine(xh, F.transpose(w0, (1, 0)), b0))
    z_t = F.sigmoid(F.affine(xh, F.transpose(w1, (1, 0)), b1))

    w2_0 = w2[:, :w2.shape[1] - hidden_size]
    w2_1 = w2[:, w2.shape[1] - hidden_size:]
    n_t = F.tanh(
        F.affine(x, F.transpose(w2_0, (1, 0)), b2) +
        r_t * F.affine(h, F.transpose(w2_1, (1, 0)), b3))
    h_t = (1 - z_t) * n_t + z_t * h

    return h_t
예제 #23
0
def yolov2_activate(x, anchors, biases):
    shape = x.shape
    y = F.reshape(x, (
        shape[0],
        anchors,
        -1,
    ) + shape[2:])
    stop = list(y.shape)
    stop[2] = 2
    t_xy = F.slice(y, (0, 0, 0, 0, 0), stop)
    stop[2] = 4
    t_wh = F.slice(y, (0, 0, 2, 0, 0), stop)
    stop[2] = 5
    t_o = F.slice(y, (0, 0, 4, 0, 0), stop)
    stop[2] = y.shape[2]
    t_p = F.slice(y, (0, 0, 5, 0, 0), stop)
    t_xy = F.sigmoid(t_xy)
    t_wh = F.exp(t_wh)
    t_o = F.sigmoid(t_o)
    t_p = F.softmax(t_p, axis=2)
    t_x, t_y, t_wh = yolov2_image_coordinate(t_xy, t_wh, biases)
    y = F.concatenate(t_x, t_y, t_wh, t_o, t_p, axis=2)
    y = F.transpose(y, (0, 1, 3, 4, 2)).reshape(
        (shape[0], -1, shape[1] / anchors))
    return y
예제 #24
0
def lighting_network(x_hat,
                     normal,
                     feature,
                     view,
                     D=512,
                     L=4,
                     N=4,
                     including_input=True):
    """
    Args
      x_hat: Differentiable intersection point (B, R, 3)
      normal: Normal on x_hat (B, R, 3) (should be normalized before).
      feature: Intermediate output of the implicit network (B, R, feature_size).
      view: View direction (B, R, 3)
      D: Dimension of a network.
      L: Number of layers.
      N: Number of frequency of the positional encoding.
      inclugin_input: Include input to the positional encoding (PE).
    """
    pe_view = positional_encoding(view, N, including_input)
    h = F.concatenate(*[x_hat, normal, feature, pe_view], axis=-1)
    for l in range(L - 1):
        h = affine(h, D, name=f"affine-{l:02d}")
        h = F.relu(h)
    h = affine(h, 3, name=f"affine-{L - 1:02d}")
    h = F.tanh(h)
    return h
예제 #25
0
def shortcut(x, ochannels, stride, shortcut_type, test):
    ichannels = x.shape[1]
    use_conv = shortcut_type.lower() == 'c'
    if ichannels != ochannels:
        assert (ichannels * 2 == ochannels) or (ichannels * 4 == ochannels)
        if shortcut_type.lower() == 'b':
            use_conv = True
    if use_conv:
        # Convolution does everything.
        # Matching channels, striding.
        with nn.parameter_scope("shortcut_conv"):
            x = PF.convolution(x,
                               ochannels, (1, 1),
                               stride=stride,
                               with_bias=False)
            x = PF.batch_normalization(x, batch_stat=not test)
    else:
        if stride != (1, 1):
            # Stride
            x = F.average_pooling(x, (1, 1), stride)
        if ichannels != ochannels:
            # Zero-padding to channel axis
            ishape = x.shape
            zeros = F.constant(0, (ishape[0], ochannels - ichannels) +
                               ishape[-2:])
            x = F.concatenate(x, zeros, axis=1)
    return x
예제 #26
0
def factorized_reduction(x, output_filter, scope, test):
    """
        Applying spatial reduction to input variable.
        Input variable is passed to:
        Skip path 1, applied average pooling with stride 2.
        Skip path 2, first padded with 0 on the right and bottom, 
                     then shifted by 1 (so that those 0-padded sides will be added, 
                     whereas its shape is the same as the original),
        Then these 2 variables are concatenated along the depth dimension.
    """
    with nn.parameter_scope(scope):
        path1 = F.average_pooling(x, (1, 1), (2, 2))
        with nn.parameter_scope("path1_conv"):
            path1 = PF.convolution(
                path1, output_filter // 2, (1, 1), with_bias=False)

        path2 = F.pad(x, (0, 1, 0, 1), mode='constant')
        path2 = F.slice(path2, (0, 0, 1, 1))
        path2 = F.average_pooling(path2, (1, 1), (2, 2))
        with nn.parameter_scope("path2_conv"):
            path2 = PF.convolution(
                path2, output_filter // 2, (1, 1), with_bias=False)

        final_path = F.concatenate(path1, path2, axis=1)
        with nn.parameter_scope("reduction_bn"):
            final_path = PF.batch_normalization(
                final_path, batch_stat=not test)

    return final_path
예제 #27
0
def yolov2_feature(c13, c18, test=False, feature_dict=None):
    '''
    '''
    if feature_dict is None:
        feature_dict = {}
    # Extra feature extraction for c18
    h = conv_bn_pool(c18, 1024, 3, pool=False, test=test, name='c18_19')
    feature_dict['c18_19'] = h
    h = conv_bn_pool(h, 1024, 3, pool=False, test=test, name='c18_20')
    feature_dict['c18_20'] = h

    # Extra feature extraction for c13
    c13_h = conv_bn_pool(c13, 64, 1, pool=False, test=test, name='c13_14')
    feature_dict['c13_14'] = c13_h
    c13_h = reorg_darknet_bug(c13_h, 2)
    feature_dict['reorg'] = c13_h

    # Concatenate c13 and c18 features together
    h = F.concatenate(c13_h, h, axis=1)
    feature_dict['route'] = h

    # Extra feature extraction of the multi-scale features
    h = conv_bn_pool(h, 1024, 3, pool=False, test=test, name='c21')
    feature_dict['c21'] = h
    return h
예제 #28
0
def frame_colorization(IA_lab,
                       IB_lab,
                       IA_last_lab,
                       features_B,
                       joint_training=True,
                       feature_noise=0,
                       luminance_noise=0,
                       temperature=0.01):
    # change to rgb for feature extraction
    IA_l = IA_lab[:, 0:1, :, :]

    # if luminance_noise:
    nonlocal_BA_lab, similarity_map, features_A_gray = warp_color(
        IA_l, IB_lab, features_B, feature_noise, temperature=temperature)
    nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
    color_input = F.concatenate(
        IA_l,
        nonlocal_BA_ab,
        similarity_map,
        IA_last_lab,
        axis=1)
    with nn.parameter_scope('colornet'):
        IA_ab_predict = colorvidnet(color_input)

    return IA_ab_predict, nonlocal_BA_lab, features_A_gray
예제 #29
0
    def d3_block(self, inp, growth_rate, num_layers, n_blocks):
        '''
        Define D3Block
        '''
        out = self.dilated_dense_block_2(inp,
                                         growth_rate * n_blocks,
                                         num_layers,
                                         scope_name='initial_block')
        if n_blocks > 1:
            lst = []
            for i in range(n_blocks):
                lst.append(out[:, i * growth_rate:(i + 1) * growth_rate])

            def update(inp_, n):
                for j in range(n_blocks - n - 1):
                    lst[j + 1 + n] += inp_[:, j * growth_rate:(j + 1) *
                                           growth_rate]

            for i in range(n_blocks - 1):
                tmp = self.dilated_dense_block_2(
                    lst[i],
                    growth_rate * (n_blocks - i - 1),
                    num_layers,
                    scope_name='layers/layer%s' % (i + 1))
                update(tmp, i)
            out = F.concatenate(*lst, axis=1)
        return out[:, -growth_rate:]
예제 #30
0
def cnn(batch_size, vocab_size, text_len, classes, features=128, train=True):
    text = nn.Variable([batch_size, text_len])

    with nn.parameter_scope("text_embed"):
        embed = PF.embed(text, n_inputs=vocab_size, n_features=features)
    print("embed", embed.shape)

    embed = F.reshape(embed, (batch_size, 1, text_len, features))
    print("embed", embed.shape)

    combined = None
    for n in range(2, 6): # 2 - 5 gram
        with nn.parameter_scope(str(n) + "_gram"):
            with nn.parameter_scope("conv"):
                conv = PF.convolution(embed, 128, kernel=(n, features))
                conv = F.relu(conv)
            with nn.parameter_scope("pool"):
                pool = F.max_pooling(conv, kernel=(conv.shape[2], 1))
                if not combined:
                    combined = F.identity(pool)
                else:
                    combined = F.concatenate(combined, pool)

    if train:
        combined = F.dropout(combined, 0.5)

    with nn.parameter_scope("output"):
        y = PF.affine(combined, classes)

    t = nn.Variable([batch_size, 1])

    _loss = F.softmax_cross_entropy(y, t)
    loss = F.reduce_mean(_loss)

    return text, y, loss, t