def fast_preprocess_layer(img, input_size, normalize, subtract_means, to_float, mean=MEANS, std=STD): ''' 对图片预处理。用paddle而不使用numpy来得到更快的速度。预测时使用。 ''' # NCHW img = P.transpose(img, perm=[0, 3, 1, 2]) img = P.image_resize(img, out_shape=[input_size, input_size], resample="BILINEAR") if normalize: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) v = P.create_tensor(dtype='float32') P.assign(np.array(std).astype(np.float32), v) v = P.reshape(v, (1, 3, 1, 1)) v = P.expand_as(v, target_tensor=img) img = (img - m) / v elif subtract_means: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) img = (img - m) elif to_float: # 只是归一化 img = img / 255 # 换成RGB格式 img_rgb = P.concat([img[:, 2:3, :, :], img[:, 1:2, :, :], img[:, 0:1, :, :]], axis=1) # Return value is in channel order [n, c, h, w] and RGB return img_rgb
def get_target_tensor(self, dis_output, t_real): """ Return the target vector for the binary cross entropy loss computation. Args: dis_output (tensor): Discriminator outputs. t_real (bool): If ``True``, uses the real label as target, otherwise uses the fake label as target. Returns: target (tensor): Target tensor vector. """ if t_real: if self.real_label_tensor is None: self.real_label_tensor = dg.to_variable(np.ones(dis_output.shape, dtype="float32") * self.real_label) return L.expand_as(self.real_label_tensor, dis_output) else: if self.fake_label_tensor is None: self.fake_label_tensor = dg.to_variable(np.ones(dis_output.shape, dtype="float32") * self.fake_label) return L.expand_as(self.fake_label_tensor, dis_output)
def index_sample(x, index): """Select input value according to index Arags: input: input matrix index: index matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> index [ [1, 2], [0, 1] ] >>> index_sample(input, index) [ [2, 3], [4, 5] ] """ x_s = x.shape dim = len(index.shape) - 1 assert x_s[:dim] == index.shape[:dim] r_x = layers.reshape(x, shape=(-1, *x_s[dim:])) index = layers.reshape(index, shape=(index.shape[0], index.shape[1], 1)) # generate arange index, shape like index # arr_index = layers.arange(start=0, end=layers.cast(layers.shape(x)[0], ), dtype=index.dtype) batch_size = layers.cast(layers.shape(index)[0], dtype=index.dtype) zero = layers.fill_constant(shape=[1], dtype=index.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=index.dtype, value=1) arr_index = layers.unsqueeze( layers.range(zero, batch_size, one, dtype=index.dtype), [1, 2]) arr_index = layers.expand_as(arr_index, index) # genrate new index new_index = layers.concat([arr_index, index], -1) new_index = layers.reshape(new_index, (-1, 2)) # get output out = layers.gather_nd(r_x, new_index) out = layers.reshape(out, (-1, x_s[-1] * 2)) return out
def forward(self, input, target, mask): """ Masked L1 loss computation. Args: input (tensor): Input tensor. target (tensor): Target tensor. mask (tensor): Mask to be applied to the output loss. Returns: (tensor): Loss value. """ mask = L.expand_as(mask, input) loss = self.criterion(input * mask, target * mask) if self.normalize_over_valid: # The loss has been averaged over all pixels. # Only average over regions which are valid. loss = loss * np.prod(mask.shape) / (L.reduce_sum(mask) + 1e-6) return loss
def index_sample(x, index): """Select input value according to index Arags: input: input matrix index: index matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> index [ [1, 2], [0, 1] ] >>> index_sample(input, index) [ [2, 3], [4, 5] ] """ x_s = x.shape dim = len(index.shape) - 1 assert x_s[:dim] == index.shape[:dim] r_x = layers.reshape(x, shape=(-1, *x_s[dim:])) index = layers.reshape(index, shape=(len(r_x), -1, 1)) # generate arange index, shape like index arr_index = layers.arange(start=0, end=len(index), dtype=index.dtype) arr_index = layers.unsqueeze(arr_index, axes=[1, 2]) arr_index = layers.expand_as(arr_index, index) # genrate new index new_index = layers.concat((arr_index, index), -1) new_index = layers.reshape(new_index, (-1, 2)) # get output out = layers.gather_nd(r_x, new_index) out = layers.reshape(out, (*x_s[:dim], -1)) return out
def forward(self, x): b, c, h, w = x.shape f_query = reshape(x, (b, -1, h * w)) f_key = reshape(x, (b, -1, h * w)) f_key = transpose(f_key, (0, 2, 1)) f_value = reshape(x, (b, -1, h * w)) f_similarity = bmm(f_query, f_key) # [h*w, h*w] f_similarity_max = reduce_max(f_similarity, -1, keep_dim=True) f_similarity_max_reshape = expand_as(f_similarity_max, f_similarity) f_similarity = f_similarity_max_reshape - f_similarity f_similarity = softmax(f_similarity) f_similarity = transpose(f_similarity, (0, 2, 1)) f_attention = bmm(f_similarity, f_value) # [h*w, c] f_attention = reshape(f_attention, (b, c, h, w)) out = self.gamma * f_attention + x return out
def ohem_conf_loss(self, pred_allboxes_conf, batch_size, labels_neg_mask, labels_pos_mask, labels_pos_index, class_vectors, labels_pos_cid): batch_conf = P.reshape(pred_allboxes_conf, (-1, self.num_classes)) loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0] loss_c = P.reshape(loss_c, (batch_size, -1)) # (batch_size, 19248) labels_neg_mask = P.concat(labels_neg_mask, axis=0) # (batch_size*19248, 1) labels_neg_mask = P.reshape(labels_neg_mask, (batch_size, -1)) # (batch_size, 19248) loss_c = labels_neg_mask * loss_c # 只留下负样本损失, (batch_size, 19248) sorted_loss_c, loss_idx = P.argsort(loss_c, axis=-1, descending=True) labels_pos_mask = P.concat(labels_pos_mask, axis=0) # (batch_size*19248, 1) labels_pos_mask = P.reshape(labels_pos_mask, (batch_size, -1)) # (batch_size, 19248) num_pos = P.cast(P.reduce_sum(labels_pos_mask, dim=1), 'int32') # (batch_size, ) num_neg = self.negpos_ratio * num_pos # (batch_size, ) neg_topk_mask = [] for idx in range(batch_size): desc = P.range(num_neg[idx], num_neg[idx] - P.shape(labels_pos_mask)[1], -1, 'int32') neg_topk_mask.append(desc) neg_topk_mask = P.concat(neg_topk_mask, axis=0) # (batch_size*19248, ) neg_topk_mask = P.reshape(neg_topk_mask, (batch_size, -1)) # (batch_size, 19248) neg_topk_mask = P.cast(neg_topk_mask > 0, 'float32') # (batch_size, 19248) sorted_loss_c = neg_topk_mask * sorted_loss_c selected_poss = [] selected_negs = [] selected_pos_class_vectors = [] selected_neg_class_vectors = [] for idx in range(batch_size): selected_neg_idx_idx = P.where(sorted_loss_c[idx] > 0) selected_neg_idx_idx.stop_gradient = True selected_neg_idx = P.gather(loss_idx[idx], selected_neg_idx_idx) selected_neg_idx.stop_gradient = True selected_neg = P.gather(pred_allboxes_conf[idx], selected_neg_idx) selected_neg.stop_gradient = True selected_negs.append(selected_neg) selected_pos = P.gather(pred_allboxes_conf[idx], labels_pos_index[idx]) selected_pos.stop_gradient = True selected_poss.append(selected_pos) zeros = P.fill_constant(shape=[ P.shape(selected_neg)[0], ], value=0, dtype='int32') zeros.stop_gradient = True selected_neg_class_vector = P.gather(class_vectors, zeros) selected_neg_class_vector.stop_gradient = True selected_neg_class_vectors.append(selected_neg_class_vector) labels_pos_cid.stop_gradient = True labels_pos_index[idx].stop_gradient = True selected_pos_cid = P.gather(labels_pos_cid[idx], labels_pos_index[idx]) selected_pos_cid.stop_gradient = True selected_pos_class_vector = P.gather(class_vectors, selected_pos_cid) selected_pos_class_vector.stop_gradient = True selected_pos_class_vectors.append(selected_pos_class_vector) selected_negs = P.concat(selected_negs, axis=0) # (?, 1+80) selected_poss = P.concat(selected_poss, axis=0) # (?, 1+80) pred_ = P.concat([selected_negs, selected_poss], axis=0) # (?, 1+80) selected_neg_class_vectors = P.concat(selected_neg_class_vectors, axis=0) # (?, 1+80) selected_pos_class_vectors = P.concat(selected_pos_class_vectors, axis=0) # (?, 1+80) labels_ = P.concat( [selected_neg_class_vectors, selected_pos_class_vectors], axis=0) # (?, 1+80) # softmax交叉熵 fenzi = P.exp(pred_) fenmu = P.reduce_sum(fenzi, dim=1, keep_dim=True) pred_prob = fenzi / P.expand_as(fenmu, target_tensor=fenzi) conf_loss = labels_ * (0 - P.log(pred_prob + 1e-9)) # 交叉熵,加了极小的常数防止nan conf_loss = P.reduce_sum(conf_loss) return conf_loss
def ghm_c_loss(self, pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2): labels_pos_cid2 = P.reshape(labels_pos_cid2, (-1, )) # [batch_size*num_priors] pred_allboxes_conf_r = P.reshape( pred_allboxes_conf, (-1, P.shape(pred_allboxes_conf)[2] )) # [batch_size*num_priors, num_classes] label_prob = P.gather( class_vectors, labels_pos_cid2) # one-hot掩码 (batch_size*num_priors, num_classes) # 我们可以在训练时改为sigmoid激活,预测时依然还是softmax激活。 # 能这么做的原因是,若某位的sigmoid值最大,那么一定有该位的softmax值最大。 pred_prob = P.sigmoid(pred_allboxes_conf_r) pred_prob = P.cast(pred_prob, 'float32') # 二值交叉熵损失,prob_neg_loss里其实含有忽略样本的损失,这部分不应该计算,后面会用掩码过滤。 # 样本数量变成了batch_size*num_priors*num_classes,而不是batch_size*num_priors # 某个候选框(batch_size*num_priors个之一)若真实类别是7,那么7这个通道是正样本,该框余下80个通道是负样本 # (负样本可不是指背景,而是与真实class_id通道不同的另外的通道的80个概率) # 梯度模长g。正样本是1-p,负样本是p pred_prob_copy = P.assign(pred_prob) g = (1 - pred_prob_copy) * label_prob + pred_prob_copy * (1 - label_prob) labels_pos_mask2 = P.reshape(labels_pos_mask, (-1, )) # [batch_size*num_priors] labels_neg_mask2 = P.reshape(labels_neg_mask, (-1, )) # [batch_size*num_priors] labels_pos_mask3 = P.reshape(labels_pos_mask, (-1, 1)) # [batch_size*num_priors, 1] labels_neg_mask3 = P.reshape(labels_neg_mask, (-1, 1)) # [batch_size*num_priors, 1] labels_pos_mask4 = P.expand_as( labels_pos_mask3, g) # [batch_size*num_priors, num_classes] labels_neg_mask4 = P.expand_as( labels_neg_mask3, g) # [batch_size*num_priors, num_classes] # 忽略样本(cid=-1)的g置-1.0 g = g * (labels_pos_mask4 + labels_neg_mask4) + (-1.0) * ( 1 - labels_pos_mask4 - labels_neg_mask4) g.stop_gradient = True pred_prob.stop_gradient = False # g的取值范围[0, 1]划分为k个区域 k = 5 epsilon = 1.0 / k # 区域长度 w = 0 c = P.cast(-0.5 <= g, 'float32') * P.cast(g < epsilon, 'float32') w += c * P.reduce_sum(c) for i in range(1, k - 1, 1): c = P.cast(epsilon * i <= g, 'float32') * P.cast( g < epsilon * (i + 1), 'float32') w += c * P.reduce_sum(c) c = P.cast(epsilon * (k - 1) <= g, 'float32') w += c * P.reduce_sum(c) # 梯度密度 GD = w * k # GHM_C_loss prob_pos_loss = label_prob * (0 - P.log(pred_prob + 1e-9)) / ( GD + 1e-9) # 加了极小的常数防止nan prob_neg_loss = (1 - label_prob) * ( 0 - P.log(1 - pred_prob + 1e-9)) / (GD + 1e-9) # 加了极小的常数防止nan ghm_c_loss = prob_pos_loss + prob_neg_loss ghm_c_loss = P.reduce_sum(ghm_c_loss, dim=1) ghm_c_loss = ghm_c_loss * (labels_pos_mask2 + labels_neg_mask2) ghm_c_loss = P.reduce_sum(ghm_c_loss) return ghm_c_loss