예제 #1
0
def fast_preprocess_layer(img, input_size, normalize, subtract_means, to_float, mean=MEANS, std=STD):
    ''' 对图片预处理。用paddle而不使用numpy来得到更快的速度。预测时使用。 '''

    # NCHW
    img = P.transpose(img, perm=[0, 3, 1, 2])
    img = P.image_resize(img, out_shape=[input_size, input_size], resample="BILINEAR")

    if normalize:
        m = P.create_tensor(dtype='float32')
        P.assign(np.array(mean).astype(np.float32), m)
        m = P.reshape(m, (1, 3, 1, 1))
        m = P.expand_as(m, target_tensor=img)
        v = P.create_tensor(dtype='float32')
        P.assign(np.array(std).astype(np.float32), v)
        v = P.reshape(v, (1, 3, 1, 1))
        v = P.expand_as(v, target_tensor=img)
        img = (img - m) / v
    elif subtract_means:
        m = P.create_tensor(dtype='float32')
        P.assign(np.array(mean).astype(np.float32), m)
        m = P.reshape(m, (1, 3, 1, 1))
        m = P.expand_as(m, target_tensor=img)
        img = (img - m)
    elif to_float:  # 只是归一化
        img = img / 255

    # 换成RGB格式
    img_rgb = P.concat([img[:, 2:3, :, :], img[:, 1:2, :, :], img[:, 0:1, :, :]], axis=1)

    # Return value is in channel order [n, c, h, w] and RGB
    return img_rgb
예제 #2
0
    def get_target_tensor(self, dis_output, t_real):
        """
        Return the target vector for the binary cross entropy loss computation.

        Args:
            dis_output (tensor): Discriminator outputs.
            t_real (bool): If ``True``, uses the real label as target, otherwise uses the fake label as target.
        
        Returns:
            target (tensor): Target tensor vector.
        """
        if t_real:
            if self.real_label_tensor is None:
                self.real_label_tensor = dg.to_variable(np.ones(dis_output.shape, dtype="float32") * self.real_label)
            return L.expand_as(self.real_label_tensor, dis_output)
        else:
            if self.fake_label_tensor is None:
                self.fake_label_tensor = dg.to_variable(np.ones(dis_output.shape, dtype="float32") * self.fake_label)
            return L.expand_as(self.fake_label_tensor, dis_output)
예제 #3
0
def index_sample(x, index):
    """Select input value according to index
    
    Arags:
        input: input matrix
        index: index matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> index
    [
        [1, 2],
        [0, 1]
    ]
    >>> index_sample(input, index)
    [
        [2, 3],
        [4, 5]
    ]
    """
    x_s = x.shape
    dim = len(index.shape) - 1
    assert x_s[:dim] == index.shape[:dim]
    r_x = layers.reshape(x, shape=(-1, *x_s[dim:]))
    index = layers.reshape(index, shape=(index.shape[0], index.shape[1], 1))
    # generate arange index, shape like index
    # arr_index = layers.arange(start=0, end=layers.cast(layers.shape(x)[0], ), dtype=index.dtype)
    batch_size = layers.cast(layers.shape(index)[0], dtype=index.dtype)
    zero = layers.fill_constant(shape=[1], dtype=index.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=index.dtype, value=1)
    arr_index = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=index.dtype), [1, 2])

    arr_index = layers.expand_as(arr_index, index)
    #  genrate new index
    new_index = layers.concat([arr_index, index], -1)
    new_index = layers.reshape(new_index, (-1, 2))
    # get output
    out = layers.gather_nd(r_x, new_index)
    out = layers.reshape(out, (-1, x_s[-1] * 2))
    return out
예제 #4
0
    def forward(self, input, target, mask):
        """
        Masked L1 loss computation.

        Args:
            input (tensor): Input tensor.
            target (tensor): Target tensor.
            mask (tensor): Mask to be applied to the output loss.
        Returns:
            (tensor): Loss  value.
        """
        mask = L.expand_as(mask, input)
        loss = self.criterion(input * mask, target * mask)
        if self.normalize_over_valid:
            # The loss has been averaged over all pixels.
            # Only average over regions which are valid.
            loss = loss * np.prod(mask.shape) / (L.reduce_sum(mask) + 1e-6)

        return loss
예제 #5
0
파일: nn.py 프로젝트: zhangyimi/DDParser
def index_sample(x, index):
    """Select input value according to index
    
    Arags:
        input: input matrix
        index: index matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> index
    [
        [1, 2],
        [0, 1]
    ]
    >>> index_sample(input, index)
    [
        [2, 3],
        [4, 5]
    ]
    """
    x_s = x.shape
    dim = len(index.shape) - 1
    assert x_s[:dim] == index.shape[:dim]
    r_x = layers.reshape(x, shape=(-1, *x_s[dim:]))
    index = layers.reshape(index, shape=(len(r_x), -1, 1))
    # generate arange index, shape like index
    arr_index = layers.arange(start=0, end=len(index), dtype=index.dtype)
    arr_index = layers.unsqueeze(arr_index, axes=[1, 2])
    arr_index = layers.expand_as(arr_index, index)
    #  genrate new index
    new_index = layers.concat((arr_index, index), -1)
    new_index = layers.reshape(new_index, (-1, 2))
    # get output
    out = layers.gather_nd(r_x, new_index)
    out = layers.reshape(out, (*x_s[:dim], -1))
    return out
예제 #6
0
    def forward(self, x):
        b, c, h, w = x.shape

        f_query = reshape(x, (b, -1, h * w))
        f_key = reshape(x, (b, -1, h * w))
        f_key = transpose(f_key, (0, 2, 1))
        f_value = reshape(x, (b, -1, h * w))

        f_similarity = bmm(f_query, f_key)  # [h*w, h*w]
        f_similarity_max = reduce_max(f_similarity, -1, keep_dim=True)
        f_similarity_max_reshape = expand_as(f_similarity_max, f_similarity)
        f_similarity = f_similarity_max_reshape - f_similarity

        f_similarity = softmax(f_similarity)
        f_similarity = transpose(f_similarity, (0, 2, 1))

        f_attention = bmm(f_similarity, f_value)  # [h*w, c]
        f_attention = reshape(f_attention, (b, c, h, w))

        out = self.gamma * f_attention + x
        return out
예제 #7
0
    def ohem_conf_loss(self, pred_allboxes_conf, batch_size, labels_neg_mask,
                       labels_pos_mask, labels_pos_index, class_vectors,
                       labels_pos_cid):
        batch_conf = P.reshape(pred_allboxes_conf, (-1, self.num_classes))
        loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0]
        loss_c = P.reshape(loss_c, (batch_size, -1))  # (batch_size, 19248)
        labels_neg_mask = P.concat(labels_neg_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_neg_mask = P.reshape(labels_neg_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        loss_c = labels_neg_mask * loss_c  # 只留下负样本损失, (batch_size, 19248)
        sorted_loss_c, loss_idx = P.argsort(loss_c, axis=-1, descending=True)

        labels_pos_mask = P.concat(labels_pos_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_pos_mask = P.reshape(labels_pos_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        num_pos = P.cast(P.reduce_sum(labels_pos_mask, dim=1),
                         'int32')  # (batch_size, )
        num_neg = self.negpos_ratio * num_pos  # (batch_size, )
        neg_topk_mask = []
        for idx in range(batch_size):
            desc = P.range(num_neg[idx],
                           num_neg[idx] - P.shape(labels_pos_mask)[1], -1,
                           'int32')
            neg_topk_mask.append(desc)
        neg_topk_mask = P.concat(neg_topk_mask, axis=0)  # (batch_size*19248, )
        neg_topk_mask = P.reshape(neg_topk_mask,
                                  (batch_size, -1))  # (batch_size, 19248)
        neg_topk_mask = P.cast(neg_topk_mask > 0,
                               'float32')  # (batch_size, 19248)
        sorted_loss_c = neg_topk_mask * sorted_loss_c
        selected_poss = []
        selected_negs = []
        selected_pos_class_vectors = []
        selected_neg_class_vectors = []
        for idx in range(batch_size):
            selected_neg_idx_idx = P.where(sorted_loss_c[idx] > 0)
            selected_neg_idx_idx.stop_gradient = True
            selected_neg_idx = P.gather(loss_idx[idx], selected_neg_idx_idx)
            selected_neg_idx.stop_gradient = True
            selected_neg = P.gather(pred_allboxes_conf[idx], selected_neg_idx)
            selected_neg.stop_gradient = True
            selected_negs.append(selected_neg)
            selected_pos = P.gather(pred_allboxes_conf[idx],
                                    labels_pos_index[idx])
            selected_pos.stop_gradient = True
            selected_poss.append(selected_pos)

            zeros = P.fill_constant(shape=[
                P.shape(selected_neg)[0],
            ],
                                    value=0,
                                    dtype='int32')
            zeros.stop_gradient = True
            selected_neg_class_vector = P.gather(class_vectors, zeros)
            selected_neg_class_vector.stop_gradient = True
            selected_neg_class_vectors.append(selected_neg_class_vector)

            labels_pos_cid.stop_gradient = True
            labels_pos_index[idx].stop_gradient = True
            selected_pos_cid = P.gather(labels_pos_cid[idx],
                                        labels_pos_index[idx])
            selected_pos_cid.stop_gradient = True
            selected_pos_class_vector = P.gather(class_vectors,
                                                 selected_pos_cid)
            selected_pos_class_vector.stop_gradient = True
            selected_pos_class_vectors.append(selected_pos_class_vector)
        selected_negs = P.concat(selected_negs, axis=0)  # (?, 1+80)
        selected_poss = P.concat(selected_poss, axis=0)  # (?, 1+80)
        pred_ = P.concat([selected_negs, selected_poss], axis=0)  # (?, 1+80)
        selected_neg_class_vectors = P.concat(selected_neg_class_vectors,
                                              axis=0)  # (?, 1+80)
        selected_pos_class_vectors = P.concat(selected_pos_class_vectors,
                                              axis=0)  # (?, 1+80)
        labels_ = P.concat(
            [selected_neg_class_vectors, selected_pos_class_vectors],
            axis=0)  # (?, 1+80)

        # softmax交叉熵
        fenzi = P.exp(pred_)
        fenmu = P.reduce_sum(fenzi, dim=1, keep_dim=True)
        pred_prob = fenzi / P.expand_as(fenmu, target_tensor=fenzi)
        conf_loss = labels_ * (0 - P.log(pred_prob + 1e-9))  # 交叉熵,加了极小的常数防止nan
        conf_loss = P.reduce_sum(conf_loss)
        return conf_loss
예제 #8
0
    def ghm_c_loss(self, pred_allboxes_conf, labels_pos_mask, labels_neg_mask,
                   class_vectors, labels_pos_cid2):
        labels_pos_cid2 = P.reshape(labels_pos_cid2,
                                    (-1, ))  # [batch_size*num_priors]
        pred_allboxes_conf_r = P.reshape(
            pred_allboxes_conf, (-1, P.shape(pred_allboxes_conf)[2]
                                 ))  # [batch_size*num_priors, num_classes]
        label_prob = P.gather(
            class_vectors,
            labels_pos_cid2)  # one-hot掩码  (batch_size*num_priors, num_classes)

        # 我们可以在训练时改为sigmoid激活,预测时依然还是softmax激活。
        # 能这么做的原因是,若某位的sigmoid值最大,那么一定有该位的softmax值最大。
        pred_prob = P.sigmoid(pred_allboxes_conf_r)
        pred_prob = P.cast(pred_prob, 'float32')

        # 二值交叉熵损失,prob_neg_loss里其实含有忽略样本的损失,这部分不应该计算,后面会用掩码过滤。
        # 样本数量变成了batch_size*num_priors*num_classes,而不是batch_size*num_priors
        # 某个候选框(batch_size*num_priors个之一)若真实类别是7,那么7这个通道是正样本,该框余下80个通道是负样本
        # (负样本可不是指背景,而是与真实class_id通道不同的另外的通道的80个概率)

        # 梯度模长g。正样本是1-p,负样本是p
        pred_prob_copy = P.assign(pred_prob)
        g = (1 - pred_prob_copy) * label_prob + pred_prob_copy * (1 -
                                                                  label_prob)
        labels_pos_mask2 = P.reshape(labels_pos_mask,
                                     (-1, ))  # [batch_size*num_priors]
        labels_neg_mask2 = P.reshape(labels_neg_mask,
                                     (-1, ))  # [batch_size*num_priors]
        labels_pos_mask3 = P.reshape(labels_pos_mask,
                                     (-1, 1))  # [batch_size*num_priors, 1]
        labels_neg_mask3 = P.reshape(labels_neg_mask,
                                     (-1, 1))  # [batch_size*num_priors, 1]
        labels_pos_mask4 = P.expand_as(
            labels_pos_mask3, g)  # [batch_size*num_priors, num_classes]
        labels_neg_mask4 = P.expand_as(
            labels_neg_mask3, g)  # [batch_size*num_priors, num_classes]
        # 忽略样本(cid=-1)的g置-1.0
        g = g * (labels_pos_mask4 + labels_neg_mask4) + (-1.0) * (
            1 - labels_pos_mask4 - labels_neg_mask4)
        g.stop_gradient = True
        pred_prob.stop_gradient = False

        # g的取值范围[0, 1]划分为k个区域
        k = 5
        epsilon = 1.0 / k  # 区域长度
        w = 0
        c = P.cast(-0.5 <= g, 'float32') * P.cast(g < epsilon, 'float32')
        w += c * P.reduce_sum(c)
        for i in range(1, k - 1, 1):
            c = P.cast(epsilon * i <= g, 'float32') * P.cast(
                g < epsilon * (i + 1), 'float32')
            w += c * P.reduce_sum(c)
        c = P.cast(epsilon * (k - 1) <= g, 'float32')
        w += c * P.reduce_sum(c)

        # 梯度密度
        GD = w * k

        # GHM_C_loss
        prob_pos_loss = label_prob * (0 - P.log(pred_prob + 1e-9)) / (
            GD + 1e-9)  # 加了极小的常数防止nan
        prob_neg_loss = (1 - label_prob) * (
            0 - P.log(1 - pred_prob + 1e-9)) / (GD + 1e-9)  # 加了极小的常数防止nan
        ghm_c_loss = prob_pos_loss + prob_neg_loss
        ghm_c_loss = P.reduce_sum(ghm_c_loss, dim=1)
        ghm_c_loss = ghm_c_loss * (labels_pos_mask2 + labels_neg_mask2)
        ghm_c_loss = P.reduce_sum(ghm_c_loss)

        return ghm_c_loss