コード例 #1
0
ファイル: cnn_model_026.py プロジェクト: kzky/works
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var):
    #TODO: squared error/absolute error
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(
            F.softmax(pred0), F.softmax(pred1)) * F.exp(-log_var)) \
                  + F.mean(log_var)
    return loss_sr
コード例 #2
0
def pred(decoder_hidden_states, ctx_vectors, query_embed, query_embed_mask,
         rule_num, token_num, embedding_size, hidden_size):
    """
    decoder_hidden_states: (batch_size, max_action_length, decoder_state_size)
    ctx_vectors: (batch_size, max_action_length, encoder_state_size)
    """
    batch_size, max_action_length, _ = decoder_hidden_states.shape
    dc = concatenate(decoder_hidden_states, ctx_vectors, axis=2)
    with nn.parameter_scope("decoder_state_rule"):
        # (batch_size, max_action_length, embedding_size)
        decoder_hidden_state_trans_rule = dense(decoder_hidden_states,
                                                embedding_size,
                                                base_axis=2)
    with nn.parameter_scope("decoder_state_token"):
        # (batch_size, max_action_length, decoder_state_size + encoder_state_size)
        # (batch_size, max_action_length, embedding_size)
        decoder_hidden_state_trans_token = dense(dc,
                                                 embedding_size,
                                                 base_axis=2)

    with nn.parameter_scope("rule_embedding"):
        # (batch_size, max_action_length, rule_num)
        rule_predict = embed_inverse(decoder_hidden_state_trans_rule,
                                     rule_num,
                                     embedding_size,
                                     base_axis=2)
        embed_b = nn.parameter.get_parameter_or_create("embed/b", (rule_num, ),
                                                       need_grad=True)
        embed_b.data.zero()
        embed_b = F.reshape(embed_b, (1, 1, rule_num), inplace=False)
        embed_b = F.broadcast(embed_b,
                              (batch_size, max_action_length, rule_num))
        rule_predict = F.softmax(rule_predict + embed_b)

    with nn.parameter_scope("gen_action"):
        terminal_gen_action_prob = dense(decoder_hidden_states,
                                         2,
                                         base_axis=2,
                                         activation=F.softmax)

    with nn.parameter_scope("token_embedding"):
        # (batch_size, max_action_length, token_num)
        token_predict = embed_inverse(decoder_hidden_state_trans_token,
                                      token_num,
                                      embedding_size,
                                      base_axis=2)
        embed_b = nn.parameter.get_parameter_or_create("embed/b",
                                                       (token_num, ),
                                                       need_grad=True)
        embed_b.data.zero()
        embed_b = F.reshape(embed_b, (1, 1, token_num), inplace=False)
        embed_b = F.broadcast(embed_b,
                              (batch_size, max_action_length, token_num))
        token_predict = F.softmax(token_predict + embed_b)

    with nn.parameter_scope("copy_token"):
        # (batch_size, max_action_length, max_query_length)
        copy_prob = pointer_net(query_embed, query_embed_mask, dc, hidden_size)
    return rule_predict, terminal_gen_action_prob, token_predict, copy_prob
コード例 #3
0
ファイル: cnn_model_031.py プロジェクト: kzky/works
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(F.softmax(pred0), F.softmax(pred1))
    with nn.context_scope(ctx):
        loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5
    return loss_sr
コード例 #4
0
ファイル: cnn_model_031.py プロジェクト: kzky/works
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(F.softmax(pred0), F.softmax(pred1))
    with nn.context_scope(ctx):
        loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) +
                         (s0 / s1 + s1 / s0)) * 0.5
    return loss_sr
コード例 #5
0
ファイル: cnn_model_024.py プロジェクト: kzky/works
def er_loss(ctx, pred):
    with nn.context_scope(ctx):
        bs = pred.shape[0]
        d = np.prod(pred.shape[1:])
        denominator = bs * d
        pred_normalized = F.softmax(pred)
        pred_log_normalized = F.log(F.softmax(pred))
        loss_er = -F.sum(pred_normalized * pred_log_normalized) / denominator
    return loss_er
コード例 #6
0
ファイル: cnn_model_060.py プロジェクト: kzky/works
def er_loss(ctx, pred):
    with nn.context_scope(ctx):
        bs = pred.shape[0]
        d = np.prod(pred.shape[1:])
        denominator = bs * d
        pred_normalized = F.softmax(pred)
        pred_log_normalized = F.log(F.softmax(pred))
        loss_er = - F.sum(pred_normalized * pred_log_normalized) / denominator
    return loss_er
コード例 #7
0
def psm_net(left, right, maxdisp, training):
    print(training)
    if training:
        batch_stat = True
    else:
        batch_stat = False

    # feature extraction
    refimg_fea = feature_extraction(left, batch_stat, training)
    targetimg_fea = feature_extraction(right, batch_stat, training)

    # matching
    cost = build_cost_volume(refimg_fea, targetimg_fea, maxdisp)

    cost0 = dres0(cost, batch_stat)
    cost0 = dres1(cost0, batch_stat) + cost0

    out1, pre1, post1 = hourglass(cost0, None, None, batch_stat)
    out1 = out1 + cost0

    out2, pre2, post2 = hourglass(out1, pre1, post1, batch_stat)
    out2 = out2 + cost0

    out3, pre3, post3 = hourglass(out2, pre1, post2, batch_stat)
    out3 = out3 + cost0

    cost1 = classif1(out1, batch_stat)
    cost2 = classif2(out2, batch_stat) + cost1
    cost3 = classif3(out3, batch_stat) + cost2

    if training:
        with nn.parameter_scope('cost1_upsample'):
            cost1_upsample = upsample(cost1, 4, True)
            cost1_upsample = F.softmax(cost1_upsample, axis=2)
            pred1 = disparityregression(cost1_upsample, maxdisp)

        with nn.parameter_scope('cost2_upsample'):
            cost2_upsample = upsample(cost2, 4, True)
            cost2_upsample = F.softmax(cost2_upsample, axis=2)
            pred2 = disparityregression(cost2_upsample, maxdisp)

    with nn.parameter_scope('cost3_upsample'):
        cost3_upsample = upsample(cost3, 4, True)
        cost3_upsample = F.softmax(cost3_upsample, axis=2)
        pred3 = disparityregression(cost3_upsample, maxdisp)

    if training:
        return pred1, pred2, pred3
    else:
        return pred3
コード例 #8
0
 def __init__(self, parents):
     smo.Graph.__init__(self, parents=parents)
     join_param = Parameter(shape=(len(parents) + 3, ))
     join_prob = F.softmax(join_param)
     self.append(
         smo.Input(name='input_1',
                   value=nn.Variable((10, 20, 32, 32)),
                   eval_prob=join_prob[0] + join_prob[1]))
     self.append(
         smo.Conv(name='conv',
                  parents=[self[-1]],
                  in_channels=20,
                  out_channels=20,
                  kernel=(3, 3),
                  pad=(1, 1),
                  eval_prob=join_prob[0]))
     self.append(
         smo.Input(name='input_2',
                   value=nn.Variable((10, 20, 32, 32)),
                   eval_prob=join_prob[2]))
     self.append(
         smo.Join(name='join',
                  parents=parents + [mi for mi in self],
                  mode='linear',
                  join_parameters=join_param))
コード例 #9
0
ファイル: model.py プロジェクト: sony/ai-research-code
 def __call__(self, features):
     upsampled_inputs = [
         F.interpolate(x,
                       output_size=features[0].shape[2:],
                       mode='linear',
                       align_corners=False,
                       half_pixel=True) for x in features
     ]
     inputs = F.concatenate(*upsampled_inputs, axis=1)
     out = self.conv2d(inputs,
                       self.hparams['channels'],
                       kernel_size=1,
                       stride=1,
                       bias=False,
                       name='convs/0/conv')
     out = F.relu(self.batch_norm(out, name='convs/0/bn'))
     out = self.conv2d(out,
                       self.hparams['num_classes'],
                       kernel_size=1,
                       stride=1,
                       bias=True,
                       name='conv_seg')
     out = F.interpolate(out,
                         output_size=self.output_size,
                         mode='linear',
                         align_corners=False,
                         half_pixel=True)
     if self.test:
         return F.softmax(out, axis=1)
     return out
コード例 #10
0
def attn_block(x, name, num_heads=4, fix_parameters=False):
    """Multihead attention block"""
    B, C, H, W = x.shape

    with nn.parameter_scope(name):
        # Get query, key, value
        h = normalize(x, name="norm")
        # nin(3 * C) -> split is faster?
        q = nin(h, C, name="q")
        k = nin(h, C, name="k")
        v = nin(h, C, name="v")

        # Attention
        w = F.batch_matmul(F.reshape(q, (B * num_heads, -1, H * W)),
                           F.reshape(k, (B * num_heads, -1, H * W)),
                           transpose_a=True)
        w = F.mul_scalar(w, int(C)**(-0.5), inplace=True)

        assert w.shape == (B * num_heads, H * W, H * W)
        w = F.softmax(w, axis=-1)

        h = F.reshape(v, (B * num_heads, -1, H * W))
        h = F.batch_matmul(h, w)
        h = F.reshape(h, (B, C, H, W))

        # output projection
        h = nin(h, C, name='proj_out', zeroing_w=True)

    assert h.shape == x.shape
    return F.add2(h, x, inplace=True)
コード例 #11
0
ファイル: cnn_model_081.py プロジェクト: kzky/works
def kl_divergence(ctx, pred, label, log_var):
    with nn.context_scope(ctx):
        s = F.pow_scalar(F.exp(log_var), 0.5)
        elms = softmax_with_temperature(ctx, label, s) \
               * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
コード例 #12
0
ファイル: cnn_model_063.py プロジェクト: kzky/works
def kl_divergence(ctx, pred, label, log_var):
    with nn.context_scope(ctx):
        s = F.pow_scalar(F.exp(log_var), 0.5)
        elms = softmax_with_temperature(ctx, label, s) \
               * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
コード例 #13
0
    def random_generate(self, num_images, path):

        # Generate from the uniform prior of the base model
        indices = F.randint(low=0,
                            high=self.num_embedding,
                            shape=[num_images] + self.latent_shape)
        indices = F.reshape(indices, (-1, ), inplace=True)
        quantized = F.embed(indices, self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_uniform_prior = self.base_model(quantized,
                                                quantized_as_input=True,
                                                test=True)

        # Generate images using pixelcnn prior
        indices = nn.Variable.from_numpy_array(
            np.zeros(shape=[num_images] + self.latent_shape))
        labels = F.randint(low=0, high=self.num_classes, shape=(num_images, 1))
        labels = F.one_hot(labels, shape=(self.num_classes, ))

        # Sample from pixelcnn - pixel by pixel
        import torch  # Numpy behavior is different and not giving correct output
        for i in range(self.latent_shape[0]):
            for j in range(self.latent_shape[1]):
                quantized = F.embed(indices.reshape((-1, )),
                                    self.base_model.vq.embedding_weight)
                quantized = F.transpose(
                    quantized.reshape([num_images] + self.latent_shape +
                                      [quantized.shape[-1]]), (0, 3, 1, 2))
                indices_sample = self.prior(quantized, labels)
                indices_prob = F.reshape(indices_sample,
                                         indices.shape +
                                         (indices_sample.shape[-1], ),
                                         inplace=True)[:, i, j]
                indices_prob = F.softmax(indices_prob)

                indices_prob_tensor = torch.from_numpy(indices_prob.d)
                sample = indices_prob_tensor.multinomial(1).squeeze().numpy()
                indices[:, i, j] = sample

        print(indices.d)
        quantized = F.embed(indices.reshape((-1, )),
                            self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_pixelcnn_prior = self.base_model(quantized,
                                                 quantized_as_input=True,
                                                 test=True)

        self.save_image(img_gen_uniform_prior,
                        os.path.join(path, 'generate_uniform.png'))
        self.save_image(img_gen_pixelcnn_prior,
                        os.path.join(path, 'generate_pixelcnn.png'))

        print('Random labels generated for pixelcnn prior:',
              list(F.max(labels, axis=1, only_index=True).d))
コード例 #14
0
def network(x, test=False):
    # Input:x -> 1,128,128
    # ImageAugmentation
    h = F.image_augmentation(x, (1,128,128), (0,0), 1, 1, 0, 1, 0, False, False, 0, False, 1, 0.5, False, 0)
    # Convolution -> 16,124,124
    h = PF.convolution(h, 16, (5,5), (0,0), name='Convolution')
    # ReLU
    h = F.relu(h, True)
    # MaxPooling -> 16,62,62
    h = F.max_pooling(h, (2,2), (2,2))
    # Convolution_2 -> 30,60,60
    h = PF.convolution(h, 30, (3,3), (0,0), name='Convolution_2')
    # MaxPooling_2 -> 30,30,30
    h = F.max_pooling(h, (2,2), (2,2))
    # Tanh_2
    h = F.tanh(h)
    # Affine -> 150
    h = PF.affine(h, (150,), name='Affine')
    # ReLU_2
    h = F.relu(h, True)
    # Affine_2 -> 2
    h = PF.affine(h, (2,), name='Affine_2')
    # Softmax
    h = F.softmax(h)
    return h
コード例 #15
0
    def compute_context(prev_state):
        batch_size = prev_state.shape[0]
        ht = PF.affine(prev_state,
                       attention_units,
                       with_bias=False,
                       name='Waht')
        # -> (batch_size, attention_units)
        ht = F.reshape(ht, (batch_size, 1, attention_units))
        # -> (batch_size, 1, attention_units)
        ht = F.broadcast(ht,
                         (batch_size, sentence_length_source, attention_units))
        # -> (batch_size, sentence_length_source, attention_units)

        attention = F.tanh(hs + ht)
        # -> (batch_size, sentence_length_source, attention_units)
        attention = time_distributed(PF.affine)(attention,
                                                1,
                                                with_bias=False,
                                                name='attention')
        # -> (batch_size, sentence_length_source, 1)
        attention = F.softmax(attention, axis=1)
        # -> (batch_size, sentence_length_source, 1)

        context = F.batch_matmul(hs, attention, transpose_a=True)
        context = F.reshape(context, (batch_size, attention_units))

        return context
コード例 #16
0
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False):
    """Attention block"""
    x = h

    # 1x1 convolutions
    b, c, s0, s1 = h.shape
    c_r = c // r
    assert c_r > 0
    f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f",
                      with_bias=False, sn=sn, test=test)
    g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g",
                      with_bias=False, sn=sn, test=test)
    h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h",
                      with_bias=False, sn=sn, test=test)

    # Attend
    attn = F.batch_matmul(f_x.reshape(
        [b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True)
    attn = F.softmax(attn, 1)
    h_x = h_x.reshape([b, c, -1])
    o = F.batch_matmul(h_x, attn)
    o = F.reshape(o, [b, c, s0, s1])

    # Shortcut
    gamma = get_parameter_or_create(
        "gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters)
    y = gamma * o + x
    return y
コード例 #17
0
def softmax_cross_entropy_backward(inputs, axis=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    t0 = inputs[2]

    D = len(x0.shape)
    axis = positive_axis(axis, D)
    c0 = x0.shape[axis]
    t0_shape = [s for s in t0.shape if s != 1]
    u0 = F.reshape(t0, (-1, 1), inplace=False)
    u1 = F.one_hot(u0, (c0, ))
    to = F.reshape(u1, t0_shape + [
        c0,
    ])
    t0 = no_grad(to)
    if axis != len(to.shape) - 1:
        oaxes = [i for i in range(len(t0_shape))]
        taxes = oaxes[:axis] + [to.ndim - 1] + oaxes[axis:]
        to = F.transpose(to, taxes)
    dx0 = dy * (F.softmax(x0, axis=axis) - to)
    return dx0, None
コード例 #18
0
ファイル: ops.py プロジェクト: shikisawamura/nnabla-examples
def Bahdanau_attention(query, values, out_features, scope):
    r"""Return the Bahdanau attention mechanism.

    Args:
        query (nn.Variable): A query of size (B, 1, C).
        values (nn.Variable): Values of size (B, T, C).
        out_features (int): The projected dimensionality.
        scope (str): Parameter scope.

    Returns:
        nn.Variable: The context vector.
        nn.Variable: The attention weight vector.
    """
    with nn.parameter_scope(scope):
        x = PF.affine(query, out_features, base_axis=2,
                      with_bias=False, name='query')
        y = PF.affine(values, out_features, base_axis=2,
                      with_bias=False, name='values')
        # scores of shape (B, T, 1)
        scores = PF.affine(F.tanh(x + y), 1, base_axis=2,
                           with_bias=False, name='scores')
        # attention_weights of shape (B, 1, T)
        attention_weights = F.softmax(
            scores, axis=1).reshape((query.shape[0], 1, -1))
        # context_vector shape after sum == (B, 1, C)
        context_vector = F.batch_matmul(attention_weights, values)

    return context_vector, attention_weights
コード例 #19
0
def yolov2_activate(x, anchors, biases):
    shape = x.shape
    y = F.reshape(x, (
        shape[0],
        anchors,
        -1,
    ) + shape[2:])
    stop = list(y.shape)
    stop[2] = 2
    t_xy = F.slice(y, (0, 0, 0, 0, 0), stop)
    stop[2] = 4
    t_wh = F.slice(y, (0, 0, 2, 0, 0), stop)
    stop[2] = 5
    t_o = F.slice(y, (0, 0, 4, 0, 0), stop)
    stop[2] = y.shape[2]
    t_p = F.slice(y, (0, 0, 5, 0, 0), stop)
    t_xy = F.sigmoid(t_xy)
    t_wh = F.exp(t_wh)
    t_o = F.sigmoid(t_o)
    t_p = F.softmax(t_p, axis=2)
    t_x, t_y, t_wh = yolov2_image_coordinate(t_xy, t_wh, biases)
    y = F.concatenate(t_x, t_y, t_wh, t_o, t_p, axis=2)
    y = F.transpose(y, (0, 1, 3, 4, 2)).reshape(
        (shape[0], -1, shape[1] / anchors))
    return y
コード例 #20
0
def network(x, y, test=False):
    # Input:x -> 3,64,64
    # AveragePooling -> 3,12,21
    h = F.average_pooling(x, (5, 3), (5, 3))
    # LeakyReLU_2
    h = F.leaky_relu(h, 0.1, True)
    # Convolution_2 -> 20,13,21
    h = PF.convolution(h, 20, (2, 3), (1, 1), name='Convolution_2')
    # BatchNormalization
    h = PF.batch_normalization(h, (1, ),
                               0.9,
                               0.0001,
                               not test,
                               name='BatchNormalization')
    # ReLU
    h = F.relu(h, True)
    # DepthwiseConvolution
    h = PF.depthwise_convolution(h, (5, 5), (2, 2),
                                 name='DepthwiseConvolution')
    # MaxPooling_2 -> 20,6,7
    h = F.max_pooling(h, (2, 3), (2, 3))
    # LeakyReLU
    h = F.leaky_relu(h, 0.1, True)
    # Affine -> 2
    h = PF.affine(h, (2, ), name='Affine')
    # Softmax
    h = F.softmax(h)
    return h
コード例 #21
0
ファイル: main.py プロジェクト: miumiu0917/nnabla-cifar10
def convolution(x):
    x = x.reshape([BATCH_SIZE, IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
    with nn.parameter_scope("conv1"):
        output = PF.convolution(x, 16, (5, 5), stride=(2, 2), pad=(1, 1))
        output = F.relu(output)

    with nn.parameter_scope("conv2"):
        output = PF.convolution(output, 32, (3, 3), stride=(1, 1), pad=(1, 1))
        output = F.relu(output)

    with nn.parameter_scope("conv3"):
        output = PF.convolution(output, 64, (3, 3), stride=(1, 1), pad=(1, 1))
        output = F.relu(output)

    output = output.reshape([BATCH_SIZE, int(output.size / BATCH_SIZE)])

    with nn.parameter_scope("fc1"):
        output = PF.affine(output, 1024)
        output = F.relu(output)

    with nn.parameter_scope("fc2"):
        output = PF.affine(output, 256)
        output = F.relu(output)

    with nn.parameter_scope("softmax"):
        output = PF.affine(output, 10)
        output = F.softmax(output)

    return output
コード例 #22
0
def mlp(image, test=False):
    image /= 255.0
    c1 = F.relu(PF.convolution(image, 32, (3, 3), name='conv1'), inplace=True)
    c2 = F.relu(PF.convolution(c1, 128, (3, 3), name='conv2'), inplace=True)
    c3 = F.relu(PF.convolution(c2, 256, (3, 3), name='conv3'), inplace=True)
    c4 = F.relu(PF.affine(c3, 512, name='fc3'), inplace=True)
    c5 = PF.affine(c3, 10, name='fc4')
    return F.softmax(c5)
コード例 #23
0
def detect_keypoint(x, block_expansion, num_kp, num_channels, max_features,
                    num_blocks, temperature, estimate_jacobian=False, scale_factor=1,
                    single_jacobian_map=False, pad=0,
                    test=False, comm=None):

    if scale_factor != 1:
        x = anti_alias_interpolate(x, num_channels, scale_factor)

    with nn.parameter_scope("hourglass"):
        feature_map = hourglass(x, block_expansion, num_blocks=num_blocks,
                                max_features=max_features, test=test, comm=comm)

    with nn.parameter_scope("keypoint_detector"):
        inmaps, outmaps = feature_map.shape[1], num_kp
        k_w = I.calc_normal_std_he_forward(
            inmaps, outmaps, kernel=(7, 7)) / np.sqrt(2.)
        k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.)
        w_init = I.UniformInitializer((-k_w, k_w))
        b_init = I.UniformInitializer((-k_b, k_b))
        prediction = PF.convolution(feature_map, outmaps=num_kp,
                                    kernel=(7, 7), pad=(pad, pad),
                                    w_init=w_init, b_init=b_init)

    final_shape = prediction.shape

    heatmap = F.reshape(prediction, (final_shape[0], final_shape[1], -1))
    heatmap = F.softmax(heatmap / temperature, axis=2)
    heatmap = F.reshape(heatmap, final_shape, inplace=False)

    out = gaussian2kp(heatmap)  # {"value": value}, keypoint positions.

    if estimate_jacobian:
        if single_jacobian_map:
            num_jacobian_maps = 1
        else:
            num_jacobian_maps = num_kp

        with nn.parameter_scope("jacobian_estimator"):
            jacobian_map = PF.convolution(feature_map,
                                          outmaps=4*num_jacobian_maps,
                                          kernel=(7, 7), pad=(pad, pad),
                                          w_init=I.ConstantInitializer(0),
                                          b_init=np.array([1, 0, 0, 1]*num_jacobian_maps))

        jacobian_map = F.reshape(
            jacobian_map, (final_shape[0], num_jacobian_maps, 4, final_shape[2], final_shape[3]))
        heatmap = F.reshape(
            heatmap, heatmap.shape[:2] + (1,) + heatmap.shape[2:], inplace=False)

        jacobian = heatmap * jacobian_map
        jacobian = F.sum(jacobian, axis=(3, 4))
        jacobian = F.reshape(
            jacobian, (jacobian.shape[0], jacobian.shape[1], 2, 2), inplace=False)
        out['jacobian'] = jacobian  # jacobian near each keypoint.

    # out is a dictionary containing {"value": value, "jacobian": jacobian}

    return out
コード例 #24
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]
        # Inputs
        x0 = inputs[0].data  # logits
        t0 = inputs[1].data  # labels
        dz = inputs[2].data  # grad_input
        # Outputs
        dx0 = outputs[0].data
        dt0 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_t0 = inputs[1].grad
        g_dz = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dt0 = outputs[1].grad

        # Computation
        ## w.r.t. x0
        if prop_down[0]:
            # gradient is the backward of softmax with (g_dx0 * dz) as in-coming gradient
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            si.grad.fill(0.0)
            so = F.softmax(si, axis)
            if not nn.get_auto_forward():
                so.forward()
            so.backward(g_dx0 * dz, clear_buffer=False)
            g_x0_ = si.grad
            if accum[0]:
                g_x0 += g_x0_
            else:
                g_x0.copy_from(g_x0_)

        ## w.r.t. t0 is not required

        ## w.r.t. dz
        if prop_down[2]:
            # Instable implementation since using `/ dz`
            ## g_dz_ = g_dx0 * dx0 / dz
            ## g_dz_ = F.sum(g_dz_, axis)

            shape = dz.shape if dz.shape != [] else [1]
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            ti = nn.Variable(t0.shape).apply(data=t0)
            o = nn.Variable(shape)
            o.grad.fill(1.0)
            self.forward_func.backward([si, ti], [o], [False, False])

            # Sum g_dx0_i * (y_hat_i - y_i) over i
            g_dz_ = F.sum(g_dx0 * si.grad, axis)
            if accum[2]:
                g_dz += g_dz_
            else:
                g_dz.copy_from(g_dz_)
コード例 #25
0
def net(n_class,
        xs,
        xq,
        init_type='nnabla',
        embedding='conv4',
        net_type='prototypical',
        distance='euclid',
        test=False):
    '''
    Similarity net function
        This function implements the network with settings as specified.

        Args:
            n_class (int): number of classes. Typical setting is 5 or 20.
            xs (~nnabla.Variable): support images.
            xq (~nnabla.Variable): query images.
            init_type (str, optional): initialization type for weights and bias parameters. See conv_initializer function.
            embedding(str, optional): embedding network.
            distance (str, optional): similarity metric to use. See similarity function.
            test (bool, optional): switch flag for training dataset and test dataset
        Returns:
            h (~nnabla.Variable): output variable indicating similarity between support and query.
    '''

    # feature embedding for supports and queries
    n_shot = xs.shape[0] / n_class
    n_query = xq.shape[0] / n_class
    if embedding == 'conv4':
        fs = conv4(xs, test, init_type)  # tensor of (n_support, fdim)
        fq = conv4(xq, test, init_type)  # tensor of (n_query, fdim)

    if net_type == 'matching':
        # This example does not include the full-context-embedding of matching networks.
        fs = F.reshape(fs, (1, ) + fs.shape)  # (1, n_way, fdim)
        # (n_way*n_query, 1, fdim)
        fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:])
        h = similarity(fq, fs, distance)
        h = h - F.mean(h, axis=1, keepdims=True)
        if 1 < n_shot:
            h = F.minimum_scalar(F.maximum_scalar(h, -35), 35)
            h = F.softmax(h)
            h = F.reshape(h, (h.shape[0], n_class, n_shot))
            h = F.mean(h, axis=2)
            # Reverse to logit to use same softmax cross entropy
            h = F.log(h)
    elif net_type == 'prototypical':
        if 1 < n_shot:
            fs = F.reshape(fs, (n_class, n_shot) + fs.shape[1:])
            fs = F.mean(fs, axis=1)
        fs = F.reshape(fs, (1, ) + fs.shape)  # (1, n_way, fdim)
        # (n_way*n_query, 1, fdim)
        fq = F.reshape(fq, (fq.shape[0], 1) + fq.shape[1:])
        h = similarity(fq, fs, distance)
        h = h - F.mean(h, axis=1, keepdims=True)

    return h
コード例 #26
0
ファイル: mixedop.py プロジェクト: sony/nnabla-nas
    def call(self, input):
        if self._mode == 'full':
            out = F.stack(*[op(input) for op in self._ops], axis=0)
            out = F.mul2(out, F.softmax(self._alpha, axis=0))
            return F.sum(out, axis=0)

        # update active index
        self._update_active_index()

        return self._ops[self._active](input)
コード例 #27
0
def network01E(x, y, test=False):
    # Input:x -> 1,64,48
    # BinaryConnectConvolution -> 64,60,44
    h = PF.binary_connect_convolution(x,
                                      64, (5, 5), (0, 0),
                                      name='BinaryConnectConvolution')
    # MaxPooling -> 64,30,22
    h = F.max_pooling(h, (2, 2), (2, 2))
    # BatchNormalization
    h = PF.batch_normalization(h, (1, ),
                               0.5,
                               0.01,
                               not test,
                               name='BatchNormalization')
    # BinarySigmoid
    h = F.binary_sigmoid(h)
    # BinaryConnectConvolution_2 -> 64,26,18
    h = PF.binary_connect_convolution(h,
                                      64, (5, 5), (0, 0),
                                      name='BinaryConnectConvolution_2')
    # MaxPooling_2 -> 64,13,9
    h = F.max_pooling(h, (2, 2), (2, 2))
    # BatchNormalization_2
    h = PF.batch_normalization(h, (1, ),
                               0.5,
                               0.01,
                               not test,
                               name='BatchNormalization_2')
    # BinarySigmoid_2
    h = F.binary_sigmoid(h)
    # BinaryConnectAffine -> 512
    h = PF.binary_connect_affine(h, (512, ), name='BinaryConnectAffine')
    # BatchNormalization_3
    h = PF.batch_normalization(h, (1, ),
                               0.5,
                               0.01,
                               not test,
                               name='BatchNormalization_3')
    # BinarySigmoid_3
    h = F.binary_sigmoid(h)
    # BinaryConnectAffine_2 -> 10
    h = PF.binary_connect_affine(h, (10, ), name='BinaryConnectAffine_2')
    # BatchNormalization_4
    h = PF.batch_normalization(h, (1, ),
                               0.5,
                               0.01,
                               not test,
                               name='BatchNormalization_4')
    # Softmax
    h = F.softmax(h)
    # CategoricalCrossEntropy -> 1
    # h = F.categorical_cross_entropy(h, y)
    return h
コード例 #28
0
ファイル: model.py プロジェクト: sony/nnabla-examples
def _scaled_dot_product_attention(q, k, v, attn_mask, dropout):
    B, Nt, E = q.shape
    q *= float(E)**-0.5
    # (B, Nt, E) x (B, E, Ns) -> (B, Nt, Ns)
    attn = F.batch_matmul(q, k, transpose_b=True)
    if attn_mask is not None:
        attn += attn_mask
    attn_output_weights = F.softmax(attn, axis=len(attn.shape) - 1)
    if dropout > 0.0:
        attn = F.dropout(attn, p=dropout)
    # (B, Nt, Ns) x (B, Ns, E) -> (B, Nt, E)
    attn_output = F.batch_matmul(attn_output_weights, v)
    return attn_output, attn_output_weights
コード例 #29
0
def cnn_network(obs, num_actions, scope):
    with nn.parameter_scope(scope):
        out = PF.convolution(obs, 32, (8, 8), stride=(4, 4), name='conv1')
        out = F.relu(out)
        out = PF.convolution(out, 64, (4, 4), stride=(2, 2), name='conv2')
        out = F.relu(out)
        out = PF.convolution(out, 64, (3, 3), stride=(1, 1), name='conv3')
        out = F.relu(out)
        out = PF.affine(out, 512, name='fc1')
        out = F.relu(out)
        policy = F.softmax(PF.affine(out, num_actions, name='policy'))
        value = PF.affine(out, 1, name='value')
    return policy, value
コード例 #30
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        axis = self.forward_func.info.args["axis"]
        # To deal with double_backward index error for cuda in windows
        if axis < 0:
            axis += inputs[0].ndim

        # Inputs
        x0 = inputs[0].data
        y0 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_y0 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # w.r.t. x0
        if prop_down[0]:
            # gradient is the backward of softmax with (g_x0 * -sum_i dy_i) as in-coming gradient
            neg_sum_dy = -F.sum(dy, axis, True)
            si = nn.Variable(x0.shape).apply(data=x0, need_grad=True)
            si.grad.fill(0.0)
            so = F.softmax(si, axis)
            if not nn.get_auto_forward():
                so.forward()
            so.backward(g_dx0 * neg_sum_dy, clear_buffer=False)
            g_x0_ = si.grad
            if accum[0]:
                g_x0 += g_x0_
            else:
                g_x0.copy_from(g_x0_)

        # w.r.t. y0 is the grad-depends

        # w.r.t. dy
        if prop_down[2]:
            # gradient is the backward of log_softmax with g_dx0 as in-coming gradient
            lsi = nn.Variable(x0.shape).apply(data=x0,
                                              grad=g_dy,
                                              need_grad=True)
            lso = nn.Variable(x0.shape).apply(data=y0, grad=g_dx0)
            self.forward_func.backward([lsi], [lso], accum=[accum[2]])
コード例 #31
0
def get_conditional_dist(fake_images):
    """Get the prediction score using Inception v3.
        Args:
            fake_images (nn.NdArray):  NdArrays representing images.
                                       Shape must be (B, 3, 299, 299).
                                       Must be pre-normalized, i.e. its values must lie in [-1., +1.]

        Returns:
            py_given_x (nn.NdArray): Class probabilities of given images. (B, 1008)
    """
    py_given_x = construct_inceptionv3(fake_images)
    py_given_x = PF.affine(py_given_x, 1008,
                           name="Affine", with_bias=False)  # strangely, 1008 is correct, and no bias.
    py_given_x = F.softmax(py_given_x)
    return py_given_x
コード例 #32
0
ファイル: log_softmax.py プロジェクト: donproc/nnabla
def log_softmax_backward(inputs, axis=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]
    y0 = F.softmax(x0, axis=axis)
    D = len(x0.shape)
    axis = positive_axis(axis, D)
    dx0 = dy - y0 * F.sum(dy, axis=axis, keepdims=True)
    return dx0
コード例 #33
0
ファイル: cnn_model_025.py プロジェクト: kzky/works
def attention(k, q, v, div_dim=True, softmax=True):
    v_shape = v.shape
    k = F.identity(k)
    q = F.identity(q)
    k = F.reshape(k, (k.shape[0], np.prod(k.shape[1:])))
    q = F.reshape(q, (q.shape[0], np.prod(q.shape[1:])))
    v = q  # F.reshape is inplace
    cf = F.affine(q, F.transpose(k, (1, 0)))
    if div_dim:
        dim = np.prod(v_shape[1:])
        cf /= np.sqrt(dim)
    h = cf
    if softmax: 
        h = F.softmax(h)
    h = F.affine(h, v)x
    h = F.reshape(h, v_shape)
    return h
コード例 #34
0
ファイル: cnn_model_063.py プロジェクト: kzky/works
def softmax_with_temperature(ctx, x, t):
    with nn.context_scope(ctx):
        h = x / t
        h = F.softmax(h, axis=1)
    return h
コード例 #35
0
ファイル: cnn_model_060.py プロジェクト: kzky/works
def kl_divergence(ctx, pred, label):
    with nn.context_scope(ctx):
        elms = F.softmax(label, axis=1) * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
コード例 #36
0
ファイル: vat.py プロジェクト: zwsong/nnabla
def distance(y0, y1):
    """
    Distance function is Kullback-Leibler Divergence for categorical distribution
    """
    return F.kl_multinomial(F.softmax(y0), F.softmax(y1))
コード例 #37
0
ファイル: cnn_model_005_001.py プロジェクト: kzky/works
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        pred_x_u0 = F.softmax(pred0)
        pred_x_u1 = F.softmax(pred1)
        loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1))
    return loss_sr
コード例 #38
0
ファイル: cnn_model_005_001.py プロジェクト: kzky/works
def ce_loss_soft(ctx, pred, target):
    with nn.context_scope(ctx):
        #todo: devide or not
        loss = - F.mean(F.sum(F.softmax(target) * F.log(F.softmax(pred)), axis=1))
    return loss