def grow_top_k(step_idx, alive_seq, alive_log_prob, parant_idx): pre_ids = alive_seq dec_step_emb = layers.embedding( input=pre_ids, size=[self.tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='target_embedding', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale))) dec_att_out, new_hidden_array, new_cell_array = decoder_step( dec_step_emb, pre_feed, pre_hidden_array, pre_cell_array, enc_memory) projection = layers.matmul(dec_att_out, softmax_weight) logits = layers.softmax(projection) current_log = layers.elementwise_add(x=layers.log(logits), y=alive_log_prob, axis=0) base_1 = layers.cast(step_idx, 'float32') + 6.0 base_1 /= 6.0 length_penalty = layers.pow(base_1, alpha) len_pen = layers.pow( ((5. + layers.cast(step_idx + 1, 'float32')) / 6.), alpha) current_log = layers.reshape(current_log, shape=[1, -1]) current_log = current_log / length_penalty topk_scores, topk_indices = layers.topk(input=current_log, k=beam_size) topk_scores = layers.reshape(topk_scores, shape=[-1]) topk_log_probs = topk_scores * length_penalty generate_id = layers.reshape(topk_indices, shape=[-1]) % self.tar_vocab_size selected_beam = layers.reshape( topk_indices, shape=[-1]) // self.tar_vocab_size topk_finished = layers.equal(generate_id, eos_ids) topk_finished = layers.cast(topk_finished, 'float32') generate_id = layers.reshape(generate_id, shape=[-1, 1]) pre_tokens_list = layers.gather(tokens, selected_beam) full_tokens_list = layers.concat( [pre_tokens_list, generate_id], axis=1) return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \ dec_att_out, new_hidden_array, new_cell_array
def bbox_ciou(self, boxes1_x0y0x1y1, boxes2_x0y0x1y1): ''' 计算ciou = iou - p2/c2 - av :param boxes1: (batch_size, num_priors, 4) pred_x0y0x1y1 :param boxes2: (batch_size, num_priors, 4) label_x0y0x1y1 :return: ''' # 得到中心点坐标、宽高 boxes1 = P.concat( [(boxes1_x0y0x1y1[:, :, :2] + boxes1_x0y0x1y1[:, :, 2:]) * 0.5, boxes1_x0y0x1y1[:, :, 2:] - boxes1_x0y0x1y1[:, :, :2]], axis=-1) boxes2 = P.concat( [(boxes2_x0y0x1y1[:, :, :2] + boxes2_x0y0x1y1[:, :, 2:]) * 0.5, boxes2_x0y0x1y1[:, :, 2:] - boxes2_x0y0x1y1[:, :, :2]], axis=-1) # 两个矩形的面积 boxes1_area = (boxes1_x0y0x1y1[:, :, 2] - boxes1_x0y0x1y1[:, :, 0]) * ( boxes1_x0y0x1y1[:, :, 3] - boxes1_x0y0x1y1[:, :, 1]) boxes2_area = (boxes2_x0y0x1y1[:, :, 2] - boxes2_x0y0x1y1[:, :, 0]) * ( boxes2_x0y0x1y1[:, :, 3] - boxes2_x0y0x1y1[:, :, 1]) # 相交矩形的左上角坐标、右下角坐标 left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 相交矩形的面积inter_area。iou inter_section = P.relu(right_down - left_up) inter_area = inter_section[:, :, 0] * inter_section[:, :, 1] union_area = boxes1_area + boxes2_area - inter_area iou = inter_area / union_area # 包围矩形的左上角坐标、右下角坐标 enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = P.pow(enclose_wh[:, :, 0], 2) + P.pow( enclose_wh[:, :, 1], 2) # 两矩形中心点距离的平方 p2 = P.pow(boxes1[:, :, 0] - boxes2[:, :, 0], 2) + P.pow( boxes1[:, :, 1] - boxes2[:, :, 1], 2) # 增加av。分母boxes2[:, :, 3]可能为0,所以加了极小的常数防止nan atan1 = P.atan(boxes1[:, :, 2] / (boxes1[:, :, 3] + 1e-9)) atan2 = P.atan(boxes2[:, :, 2] / (boxes2[:, :, 3] + 1e-9)) v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - iou + v) ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v return ciou
def norm(param, dim, power): powered = F.pow(param, power) in_dtype = powered.dtype if in_dtype == fluid.core.VarDesc.VarType.FP16: powered = F.cast(powered, "float32") powered_norm = F.reduce_sum(powered, dim=dim, keep_dim=False) norm_ = F.pow(powered_norm, 1. / power) if in_dtype == fluid.core.VarDesc.VarType.FP16: norm_ = F.cast(norm_, "float16") return norm_
def _iou_hw(box_a, box_b, eps=1e-9): """计算两组矩形两两之间的iou以及长宽比信息 Args: box_a: (tensor) bounding boxes, Shape: [A, 4]. box_b: (tensor) bounding boxes, Shape: [B, 4]. Return: (tensor) iou, Shape: [A, B]. """ A = box_a.shape[0] B = box_b.shape[0] box_a_rb = L.reshape(box_a[:, 2:], (A, 1, 2)) box_a_rb = L.expand(box_a_rb, [1, B, 1]) box_b_rb = L.reshape(box_b[:, 2:], (1, B, 2)) box_b_rb = L.expand(box_b_rb, [A, 1, 1]) max_xy = L.elementwise_min(box_a_rb, box_b_rb) box_a_lu = L.reshape(box_a[:, :2], (A, 1, 2)) box_a_lu = L.expand(box_a_lu, [1, B, 1]) box_b_lu = L.reshape(box_b[:, :2], (1, B, 2)) box_b_lu = L.expand(box_b_lu, [A, 1, 1]) min_xy = L.elementwise_max(box_a_lu, box_b_lu) inter = L.relu(max_xy - min_xy) inter = inter[:, :, 0] * inter[:, :, 1] box_a_w = box_a[:, 2]-box_a[:, 0] box_a_h = box_a[:, 3]-box_a[:, 1] area_a = box_a_h * box_a_w area_a = L.reshape(area_a, (A, 1)) area_a = L.expand(area_a, [1, B]) # [A, B] box_b_w = box_b[:, 2]-box_b[:, 0] box_b_h = box_b[:, 3]-box_b[:, 1] area_b = box_b_h * box_b_w area_b = L.reshape(area_b, (1, B)) area_b = L.expand(area_b, [A, 1]) # [A, B] union = area_a + area_b - inter iou = inter / union # [A, B] iou取值0~1之间,iou越大越应该抑制 # 长宽比信息 atan1 = L.atan(box_a_h / (box_a_w + eps)) atan2 = L.atan(box_b_h / (box_b_w + eps)) atan1 = L.reshape(atan1, (A, 1)) atan1 = L.expand(atan1, [1, B]) # [A, B] atan2 = L.reshape(atan2, (1, B)) atan2 = L.expand(atan2, [A, 1]) # [A, B] v = 4.0 * L.pow(atan1 - atan2, 2) / (math.pi ** 2) # [A, B] v取值0~1之间,v越小越应该抑制 factor = 0.4 overlap = L.pow(iou, (1 - factor)) * L.pow(1.0 - v, factor) return overlap
def focal_loss(pred, label, alpha=0.25, gamma=2, epsilon=1e-6): ''' alpha 变大,对前景类惩罚变大,更加重视 gamma 变大,对信心大的例子更加忽略,学习难的例子 ''' pred = clip(pred, epsilon, 1 - epsilon) label = clip(label, epsilon, 1 - epsilon) loss = -1 * (alpha * layers.pow( (1 - pred), gamma) * label * layers.log(pred) + (1 - alpha) * layers.pow(pred, gamma) * (1 - label) * log(1 - pred)) return loss
def sigmoid_focal_loss(self, x, label, fg_num, gamma=2.0, alpha=0.25): C = x.shape[1] eye = paddle.eye(C + 1, dtype='float32') one_hot = L.gather(eye, label) pos_mask = one_hot[:, 1:] # 正样本掩码 p = L.sigmoid(x) # [批大小*所有格子数, 80], 预测的类别概率 pos_loss = pos_mask * (0 - L.log(p + 1e-9)) * L.pow(1 - p, gamma) * alpha neg_loss = (1.0 - pos_mask) * (0 - L.log(1 - p + 1e-9)) * L.pow( p, gamma) * (1 - alpha) focal_loss = pos_loss + neg_loss if fg_num > 0.5: # 当没有gt时,即fg_num==0时,focal_loss什么都不除。 focal_loss = focal_loss / fg_num return focal_loss
def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished): """ is_finished """ base_1 = layers.cast(source_length, 'float32') + 55.0 base_1 /= 6.0 max_length_penalty = layers.pow(base_1, self.alpha) flat_alive_log_probs = layers.reshape(alive_log_probs, [-1]) lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index]) lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1) finished_in_finished = layers.cast(finished_in_finished, 'bool') lowest_score_of_finished_in_finish += \ ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF) #print lowest_score_of_finished_in_finish bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, lower_bound_alive_scores)) decode_length = source_length + 50 length_cond = layers.less_than(x=step_idx, y=decode_length) return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
def is_finished(alive_log_prob, finished_scores, finished_in_finished): max_out_len = 200 max_length_penalty = layers.pow( layers.fill_constant([1], dtype='float32', value=((5.0 + max_out_len) / 6.0)), alpha) lower_bound_alive_score = layers.slice( alive_log_prob, starts=[0], ends=[1], axes=[0]) / max_length_penalty lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished lowest_score_of_fininshed_in_finished += ( 1.0 - finished_in_finished) * -INF lowest_score_of_fininshed_in_finished = layers.reduce_min( lowest_score_of_fininshed_in_finished) met = layers.less_than( lower_bound_alive_score, lowest_score_of_fininshed_in_finished) met = layers.cast(met, 'float32') bound_is_met = layers.reduce_sum(met) finished_eos_num = layers.reduce_sum(finished_in_finished) finish_cond = layers.less_than( finished_eos_num, layers.fill_constant([1], dtype='float32', value=beam_size)) return finish_cond
def _matrix_nms(bboxes, cate_labels, cate_scores, kernel='gaussian', sigma=2.0): """Matrix NMS for multi-class bboxes. Args: bboxes (Tensor): shape (n, 4) cate_labels (Tensor): shape (n), mask labels in descending order cate_scores (Tensor): shape (n), mask scores in descending order kernel (str): 'linear' or 'gaussian' sigma (float): std in gaussian method Returns: Tensor: cate_scores_update, tensors of shape (n) """ n_samples = len(cate_labels) if n_samples == 0: return [] # 计算一个n×n的IOU矩阵,两组矩形两两之间的IOU iou_matrix = jaccard(bboxes, bboxes) # shape: [n_samples, n_samples] iou_matrix = paddle.triu(iou_matrix, diagonal=1) # 只取上三角部分 # label_specific matrix. cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1]) # shape: [n_samples, n_samples] # 第i行第j列表示的是第i个预测框和第j个预测框的类别id是否相同。我们抑制的是同类的预测框。 d = cate_labels_x - L.transpose(cate_labels_x, [1, 0]) d = L.pow(d, 2) # 同类处为0,非同类处>0。 tf中用 == 0比较无效,所以用 < 1 label_matrix = paddle.triu(L.cast(d < 1, 'float32'), diagonal=1) # shape: [n_samples, n_samples] # IoU compensation # 非同类的iou置为0,同类的iou保留。逐列取最大iou compensate_iou = L.reduce_max(iou_matrix * label_matrix, [0, ]) # shape: [n_samples, ] # compensate_iou第0行里的值a0(重复了n_samples次)表示第0个物体与 比它分高 的 同类物体的最高iou为a0, # compensate_iou第1行里的值a1(重复了n_samples次)表示第1个物体与 比它分高 的 同类物体的最高iou为a1,... # compensate_iou里每一列里的值依次代表第0个物体、第1个物体、...、第n_samples-1个物体与 比它自己分高 的 同类物体的最高iou。 compensate_iou = L.transpose(L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1]), [1, 0]) # shape: [n_samples, n_samples] # IoU decay # 非同类的iou置为0,同类的iou保留。 # decay_iou第i行第j列表示的是第i个预测框和第j个预测框的iou,如果不是同类,该iou置0。且只取上三角部分。 decay_iou = iou_matrix * label_matrix # shape: [n_samples, n_samples] # matrix nms if kernel == 'gaussian': decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2)) compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2)) decay_coefficient = L.reduce_sum(decay_matrix / compensate_matrix, [0, ]) elif kernel == 'linear': # 看第j列。(1_test_matrixnms.py里的例子,看第2列) # decay_iou 里第2列里的值为[0.9389, 0.9979, 0, 0]。第2个物体与比它分高的2个同类物体的iou是0.9389, 0.9979。 # compensate_iou里第2列里的值为[0, 0.9409, 0.9979, 0]。比第2个物体分高的2个同类物体 与 比它们自己分高 的 同类物体的最高iou 是0, 0.9409。 # decay_matrix 里第2列里的值为[0.0610, 0.0348, 485.28, 1]。取该列的最小值为0.0348(抑制掉第2个物体的是第1个物体)。其实后面2个值不用看,因为它们总是>=1。 # 总结:decay_matrix里第j列里的第i个值若为最小值,则抑制掉第j个物体的是第i个物体。 # 而且,表现为decay_iou尽可能大,decay_matrix才会尽可能小。 decay_matrix = (1-decay_iou)/(1-compensate_iou) decay_coefficient = L.reduce_min(decay_matrix, [0, ]) else: raise NotImplementedError # 更新分数 cate_scores_update = cate_scores * decay_coefficient return cate_scores_update
def forward(self, mu, logvar=None): """ Compute loss Args: mu (tensor): mean logvar (tensor): logarithm of variance """ if logvar is None: logvar = L.zeros_like(mu) return -0.5 * L.reduce_sum(1 + logvar - L.pow(mu, 2) - L.exp(logvar))
def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow): b, _, h, w = tenFlow.shape tenDifference = tenFirst - backwarp(tenInput=tenSecond, tenFlow=tenFlow * self.fltBackward) tenDifference = L.pow(tenDifference, 2) tenDifference = L.reduce_sum(tenDifference, 1, True) # [b, 1, h, w] tenDifference = L.sqrt(tenDifference).detach() tenFeaturesFirst = self.moduleFeat(tenFeaturesFirst) tenMean = L.reshape(tenFlow, (b, 2, -1)) # [b, 2, h * w] tenMean = L.reduce_mean(tenMean, 2, True) # [b, 2, 1] tenMean = L.reshape(tenMean, (b, 2, 1, 1)) # [b, 2, 1, 1] tenMean = L.expand(tenMean, (1, 1, h, w)) # [b, 2, h, w] delta = tenFlow - tenMean diff = L.concat([tenDifference, delta, tenFeaturesFirst], 1) tenDist = self.moduleDist(self.moduleMain(diff)) tenDist = L.pow(tenDist, 2.0) * -1.0 tenDist = tenDist - L.reduce_max(tenDist, 1, True) tenDist = L.exp(tenDist) tenDivisor = L.reduce_sum(tenDist, 1, True) tenDivisor = L.reciprocal(tenDivisor) tenScaleX = L.unfold(x=tenFlow[:, 0:1, :, :], kernel_sizes=self.intUnfold, strides=1, paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w] tenScaleX = L.reshape(tenScaleX, (b, -1, h, w)) # [b, c, h, w] tenScaleX = self.moduleScaleX(tenDist * tenScaleX) * tenDivisor tenScaleY = L.unfold(x=tenFlow[:, 1:2, :, :], kernel_sizes=self.intUnfold, strides=1, paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w] tenScaleY = L.reshape(tenScaleY, (b, -1, h, w)) # [b, c, h, w] tenScaleY = self.moduleScaleY(tenDist * tenScaleY) * tenDivisor return L.concat([tenScaleX, tenScaleY], 1)
def _postprocess_output(ioup, output, an_num, num_classes, iou_aware_factor): """ post process output objectness score """ tensors = [] stride = output.shape[1] // an_num for m in range(an_num): tensors.append(output[:, stride * m:stride * m + 4, :, :]) obj = output[:, stride * m + 4:stride * m + 5, :, :] obj = L.sigmoid(obj) ip = ioup[:, m:m + 1, :, :] new_obj = L.pow(obj, (1 - iou_aware_factor)) * L.pow(ip, iou_aware_factor) new_obj = _de_sigmoid(new_obj) # 置信位未进行sigmoid()激活 tensors.append(new_obj) tensors.append(output[:, stride * m + 5:stride * m + 5 + num_classes, :, :]) output = L.concat(tensors, axis=1) return output
def grow_topk(self, i, logits, alive_seq, alive_log_probs, cache, enc_output, enc_bias): """ grow_topk """ logits = layers.reshape(logits, [self.batch_size, self.beam_size, -1]) candidate_log_probs = layers.log(layers.softmax(logits, axis=2)) log_probs = candidate_log_probs + layers.unsqueeze(alive_log_probs, axes=[2]) base_1 = layers.cast(i, 'float32') + 6.0 base_1 /= 6.0 length_penalty = layers.pow(base_1, self.alpha) #length_penalty = layers.pow(((5.0 + layers.cast(i+1, 'float32')) / 6.0), self.alpha) curr_scores = log_probs / length_penalty flat_curr_scores = layers.reshape(curr_scores, [self.batch_size, self.beam_size * self.vocab_size]) topk_scores, topk_ids = layers.topk(flat_curr_scores, k=self.beam_size * 2) topk_log_probs = topk_scores * length_penalty select_beam_index = topk_ids // self.vocab_size select_id = topk_ids % self.vocab_size #layers.Print(select_id, message="select_id", summarize=1024) #layers.Print(topk_scores, message="topk_scores", summarize=10000000) flat_select_beam_index = layers.reshape(select_beam_index, [-1]) + self.gather_top2k_append_index topk_seq = layers.gather(alive_seq, [flat_select_beam_index]) topk_seq = layers.reshape(topk_seq, [self.batch_size, 2 * self.beam_size, -1]) #concat with current ids topk_seq = layers.concat([topk_seq, layers.unsqueeze(select_id, axes=[2])], axis=2) topk_finished = layers.cast(layers.equal(select_id, self.eos_id), 'float32') #gather cache self.gather_cache(cache, flat_select_beam_index) #topk_seq: [batch_size, 2*beam_size, i+1] #topk_log_probs, topk_scores, topk_finished: [batch_size, 2*beam_size] return topk_seq, topk_log_probs, topk_scores, topk_finished, cache
def bbox_ciou(boxes1, boxes2): ''' 计算ciou = iou - p2/c2 - av :param boxes1: (8, 13, 13, 3, 4) pred_xywh :param boxes2: (8, 13, 13, 3, 4) label_xywh :return: ''' # 变成左上角坐标、右下角坐标 boxes1_x0y0x1y1 = P.concat([ boxes1[:, :, :, :, :2] - boxes1[:, :, :, :, 2:] * 0.5, boxes1[:, :, :, :, :2] + boxes1[:, :, :, :, 2:] * 0.5 ], axis=-1) boxes2_x0y0x1y1 = P.concat([ boxes2[:, :, :, :, :2] - boxes2[:, :, :, :, 2:] * 0.5, boxes2[:, :, :, :, :2] + boxes2[:, :, :, :, 2:] * 0.5 ], axis=-1) ''' 逐个位置比较boxes1_x0y0x1y1[..., :2]和boxes1_x0y0x1y1[..., 2:],即逐个位置比较[x0, y0]和[x1, y1],小的留下。 比如留下了[x0, y0] 这一步是为了避免一开始w h 是负数,导致x0y0成了右下角坐标,x1y1成了左上角坐标。 ''' boxes1_x0y0x1y1 = P.concat([ P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, :2], boxes1_x0y0x1y1[:, :, :, :, 2:]), P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, :2], boxes1_x0y0x1y1[:, :, :, :, 2:]) ], axis=-1) boxes2_x0y0x1y1 = P.concat([ P.elementwise_min(boxes2_x0y0x1y1[:, :, :, :, :2], boxes2_x0y0x1y1[:, :, :, :, 2:]), P.elementwise_max(boxes2_x0y0x1y1[:, :, :, :, :2], boxes2_x0y0x1y1[:, :, :, :, 2:]) ], axis=-1) # 两个矩形的面积 boxes1_area = ( boxes1_x0y0x1y1[:, :, :, :, 2] - boxes1_x0y0x1y1[:, :, :, :, 0]) * ( boxes1_x0y0x1y1[:, :, :, :, 3] - boxes1_x0y0x1y1[:, :, :, :, 1]) boxes2_area = ( boxes2_x0y0x1y1[:, :, :, :, 2] - boxes2_x0y0x1y1[:, :, :, :, 0]) * ( boxes2_x0y0x1y1[:, :, :, :, 3] - boxes2_x0y0x1y1[:, :, :, :, 1]) # 相交矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2) left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, :2], boxes2_x0y0x1y1[:, :, :, :, :2]) right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, 2:], boxes2_x0y0x1y1[:, :, :, :, 2:]) # 相交矩形的面积inter_area。iou inter_section = P.relu(right_down - left_up) inter_area = inter_section[:, :, :, :, 0] * inter_section[:, :, :, :, 1] union_area = boxes1_area + boxes2_area - inter_area iou = inter_area / (union_area + 1e-9) # 包围矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2) enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, :2], boxes2_x0y0x1y1[:, :, :, :, :2]) enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, 2:], boxes2_x0y0x1y1[:, :, :, :, 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = P.pow(enclose_wh[:, :, :, :, 0], 2) + P.pow( enclose_wh[:, :, :, :, 1], 2) # 两矩形中心点距离的平方 p2 = P.pow(boxes1[:, :, :, :, 0] - boxes2[:, :, :, :, 0], 2) + P.pow( boxes1[:, :, :, :, 1] - boxes2[:, :, :, :, 1], 2) # 增加av。 atan1 = P.atan(boxes1[:, :, :, :, 2] / (boxes1[:, :, :, :, 3] + 1e-9)) atan2 = P.atan(boxes2[:, :, :, :, 2] / (boxes2[:, :, :, :, 3] + 1e-9)) v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - iou + v) ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v return ciou
print('------------------ step %d ------------------' % step) # ==================== train ==================== batch_data = np.random.normal(loc=0, scale=1, size=(2, 3, 28, 28)).astype(np.float32) y_true_arr = np.random.normal(loc=0, scale=1, size=(2, 8, 28, 28)).astype(np.float32) batch_data2 = paddle.to_tensor(batch_data, place=place) y_true_arr2 = paddle.to_tensor(y_true_arr, place=place) paddle_conv01_out, paddle_bn01_out, paddle_act01_out, paddle_conv02_out = model( batch_data2) # 建立损失函数 # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true_arr2 - paddle_conv02_out, 2) mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 paddle_mseloss_out = mseloss.numpy() paddle_bn01_out = paddle_bn01_out.numpy() paddle_conv02_out = paddle_conv02_out.numpy() # 更新权重 mseloss.backward() if step % 1 == 0: optimizer.step() optimizer.clear_grad() print('train_forward:') # python代码模拟训练过程,与paddle的输出校验。我们希望和飞桨有相同的输出。 my_conv01_out = conv01.train_forward(batch_data)
def inference(self, model, inputs, outputs): """ Run inference. Args: inputs(dict): Its key is input name(str) and its value is a Variable. model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`. Returns: dict(str:Variable): Its key is output name(str) and its value is a Variable. """ # prepare while loop max_len = layers.fill_constant( shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True) min_len = layers.fill_constant( shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True) step_idx = layers.fill_constant( shape=[1], dtype="int64", value=0, force_cpu=True) ids = layers.array_write(layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx) pos_biases = layers.array_write(layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx) scores = layers.array_write(inputs["init_score"], step_idx) tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], step_idx) parent_idx = inputs["parent_idx"] if self.decoding_strategy == "beam_search": beam_size = self.beam_size else: beam_size = 1 eos_penalty = np.zeros(self.vocab_size, dtype="float32") eos_penalty[self.eos_id] = -1e9 eos_penalty = layers.assign(eos_penalty) token_penalty = np.zeros(self.vocab_size, dtype="float32") token_penalty[self.unk_id] = -1e9 if self.mask_id >= 0: token_penalty[self.mask_id] = -1e9 token_penalty = layers.assign(token_penalty) # start while loop cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) pos_bias = layers.array_read(array=pos_biases, i=step_idx) pos_bias = layers.gather(input=pos_bias, index=parent_idx) tmp_tgt_generation_mask = layers.array_read(tgt_generation_mask, i=step_idx) dtype = tmp_tgt_generation_mask.dtype append_mask = layers.fill_constant_batch_size_like( input=pre_ids, value=1.0, shape=[-1, 1, 1], dtype=dtype) tmp_tgt_generation_mask = layers.concat([tmp_tgt_generation_mask, append_mask], axis=2) pre_mask = tmp_tgt_generation_mask = layers.gather(input=tmp_tgt_generation_mask, index=parent_idx) pre_sent = layers.fill_constant_batch_size_like( input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype) if self.continuous_position: pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) + pos_bias else: pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) if self.use_role: pre_role = layers.fill_constant_batch_size_like( input=pre_mask, value=0, shape=[-1, 1, 1], dtype=pre_ids.dtype) else: pre_role = None dec_out, _ = model._generation_network( token_ids=pre_ids, type_ids=pre_sent, pos_ids=pre_pos, role_ids=pre_role, generation_mask=tmp_tgt_generation_mask, gather_idx=parent_idx) logits = model._calc_logits(dec_out) # ignore unk and mask token if self.ignore_unk: logits = layers.elementwise_add(logits, token_penalty, axis=1) # min dec length min_len_cond = layers.less_than(x=step_idx, y=min_len) def min_len_penalty(): """Plus minimum length penalty.""" return layers.elementwise_add(logits, eos_penalty, axis=1) def no_penalty(): """No penalty.""" return logits logits = layers.case([(min_len_cond, min_len_penalty)], default=no_penalty) # get probs probs = layers.softmax(logits / self.temperature) if self.decoding_strategy == "beam_search": topk_scores, topk_indices = layers.topk( input=probs, k=beam_size) else: if self.decoding_strategy.startswith("sampling"): sampling_ids = layers.sampling_id(probs, dtype="int") elif self.decoding_strategy.startswith("topk_sampling"): topk_probs, _ = layers.topk(input=probs, k=self.topk) ge_cond = layers.cast( layers.greater_equal( probs, layers.unsqueeze(topk_probs[:, -1], [1])), "float32") old_probs = probs probs = probs * ge_cond / layers.reduce_sum(topk_probs, dim=-1, keep_dim=True) sampling_ids = layers.sampling_id(probs, dtype="int") probs = old_probs else: raise ValueError(self.decoding_strategy) sampling_scores = layers.one_hot( layers.unsqueeze(sampling_ids, [1]), probs.shape[1] ) sampling_scores = sampling_scores * probs - (1 - sampling_scores) * 1e3 topk_scores, topk_indices = layers.topk( input=sampling_scores, k=1) pre_len = layers.cast(step_idx, "float32") layers.increment(x=step_idx, value=1.0, in_place=True) cur_len = layers.cast(step_idx, "float32") # update scores if self.length_average: accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores * pre_len, axis=0) / cur_len elif self.length_penalty > 0: pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty) cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty) accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores * pre_lp, axis=0) / cur_lp else: accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores, axis=0) topk_indices = layers.lod_reset(topk_indices, pre_ids) accu_scores = layers.lod_reset(accu_scores, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, end_id=self.eos_id, return_parent_idx=True) layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask) layers.array_write(pos_bias, i=step_idx, array=pos_biases) layers.assign(gather_idx, parent_idx) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=beam_size, end_id=self.eos_id) predictions = { "finished_ids": finished_ids, "finished_scores": finished_scores, "token_ids": inputs["token_ids"], "data_id": inputs["data_id"] } return predictions
def __call__(self, kernel_preds, cls_preds, mask_protos, batch_gt_objs_tensors, batch_gt_clss_tensors, batch_gt_masks_tensors, batch_gt_pos_idx_tensors): ''' :param kernel_preds: kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid], 每个格子的预测卷积核。 从 小感受野 到 大感受野。 :param cls_preds: cls_preds里每个元素形状是 [N, 80, seg_num_grid, seg_num_grid], 每个格子的预测概率,未进行sigmoid()激活。 从 小感受野 到 大感受野。 :param mask_protos: [bs, 256, s4, s4] 掩码原型 :param batch_gt_objs_tensors: 里每个元素形状是[N, seg_num_grid, seg_num_grid, 1], 每个格子的objness。 从 小感受野 到 大感受野。 :param batch_gt_clss_tensors: 里每个元素形状是[N, seg_num_grid, seg_num_grid, 80], 每个格子真实类别onehot。 从 小感受野 到 大感受野。 :param batch_gt_masks_tensors: 里每个元素形状是[N, -1, s4, s4], 真实掩码。 从 小感受野 到 大感受野。 :param batch_gt_pos_idx_tensors: 里每个元素形状是[N, -1, 3], 正样本的下标。 从 小感受野 到 大感受野。 :return: ''' batch_size = self.batch_size num_layers = len(kernel_preds) # ================= 计算损失 ================= num_ins = 0. # 记录这一批图片的正样本个数 loss_clss, loss_masks = [], [] for bid in range(batch_size): for lid in range(num_layers): # ================ 掩码损失 ====================== mask_proto = mask_protos[bid] # [256, s4, s4] 这张图片产生的掩码原型。 kernel_pred = kernel_preds[lid][ bid] # [256, seg_num_grid, seg_num_grid] 格子预测的卷积核(yolact中的“掩码系数”) kernel_pred = L.transpose( kernel_pred, perm=[1, 2, 0] ) # [seg_num_grid, seg_num_grid, 256] 格子预测的卷积核(yolact中的“掩码系数”) gt_objs = batch_gt_objs_tensors[lid][ bid] # [seg_num_grid, seg_num_grid, 1] gt_masks = batch_gt_masks_tensors[lid][bid] # [-1, s4, s4] pmidx = batch_gt_pos_idx_tensors[lid][bid] # [-1, 3] gt_objs.stop_gradient = True gt_masks.stop_gradient = True pmidx.stop_gradient = True idx_sum = L.reduce_sum(pmidx, dim=1) keep = L.where(idx_sum > -1) keep = L.reshape(keep, (-1, )) keep.stop_gradient = True pmidx = L.gather(pmidx, keep) # [M, 3] yx_idx = pmidx[:, :2] # [M, 2] m_idx = pmidx[:, 2] # [M, ] yx_idx.stop_gradient = True m_idx.stop_gradient = True # 抽出来 gt_obj = L.gather_nd(gt_objs, yx_idx) # [M, 1] 是否是真正的正样本。 pos_krn = L.gather_nd(kernel_pred, yx_idx) # [M, 256] 正样本的卷积核(掩码系数)。 gt_mask = L.gather(gt_masks, m_idx) # [M, s4, s4] 真实掩码。 # 正样本数量 num_ins += L.reduce_sum(gt_obj) # 生成预测掩码 mask_proto = L.transpose(mask_proto, perm=[1, 2, 0]) # [s4, s4, 256] masks = L.matmul(mask_proto, pos_krn, transpose_y=True) # [s4, s4, M] masks = L.sigmoid(masks) # [s4, s4, M] masks = L.transpose(masks, perm=[2, 0, 1]) # [M, s4, s4] loss_mask = self.dice_loss(masks, gt_mask, gt_obj) loss_masks.append(loss_mask) # ================ 分类损失。sigmoid_focal_loss() ====================== gamma = self.loss_gamma alpha = self.loss_alpha pred_conf = cls_preds[lid][ bid] # [80, seg_num_grid, seg_num_grid] 未进行sigmoid()激活。 pred_conf = L.transpose(pred_conf, perm=[ 1, 2, 0 ]) # [seg_num_grid, seg_num_grid, 80] 未进行sigmoid()激活。 pred_conf = L.sigmoid( pred_conf ) # [seg_num_grid, seg_num_grid, 80] 已进行sigmoid()激活。 gt_clss = batch_gt_clss_tensors[lid][ bid] # [seg_num_grid, seg_num_grid, 80] 真实类别onehot gt_clss.stop_gradient = True pos_loss = gt_clss * (0 - L.log(pred_conf + 1e-9)) * L.pow( 1 - pred_conf, gamma) * alpha neg_loss = ( 1.0 - gt_clss) * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow( pred_conf, gamma) * (1 - alpha) focal_loss = pos_loss + neg_loss focal_loss = L.reduce_sum(focal_loss, dim=[0, 1]) loss_clss.append(focal_loss) loss_masks = L.concat(loss_masks, axis=0) loss_masks = L.reduce_sum(loss_masks) * self.ins_loss_weight loss_masks = loss_masks / L.elementwise_max( L.ones((1, ), dtype='float32'), num_ins) loss_clss = L.concat(loss_clss, axis=0) loss_clss = L.reduce_sum(loss_clss) * self.clss_loss_weight loss_clss = loss_clss / L.elementwise_max( L.ones((1, ), dtype='float32'), num_ins) loss_all = {"loss_masks": loss_masks, "loss_clss": loss_clss} return loss_all
def hyp_score(log_probs, length, length_penalty): lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) return log_probs / lp
def infilling_decode(self): if self.task_type == "dialog": emb_num = 4 else: emb_num = 3 input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len]] input_dtypes = ['int64'] * emb_num + ['float32'] input_lod_levels = [0] * emb_num + [0] shapes = input_shapes + [[-1, self.max_seq_len, 1], [-1, self.max_seq_len, 1], [-1, 1], [-1], [-1, 1, self.max_seq_len], [-1, 1]] dtypes = input_dtypes + [ 'int64', 'int64', 'float32', 'int32', 'float32', 'int64' ] lod_levels = input_lod_levels + [2, 2, 2, 0, 0, 0] inputs = self.to_ternsor(shapes, dtypes, lod_levels) pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) emb_ids = {} for key, value in zip(self.emb_keys, inputs[:emb_num]): emb_ids[key] = value input_mask = inputs[emb_num] tgt_ids, tgt_pos, init_scores, parent_idx, tgt_input_mask, data_ids = inputs[ -6:] ernie = ErnieModel(emb_ids=emb_ids, input_mask=input_mask, config=self.ernie_config, use_fp16=self.use_fp16, task_type=self.task_type, decoding=True, gather_idx=parent_idx) max_len = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=self.max_dec_len, force_cpu=True) step_idx = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True) pos_idx = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=1, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) ids = layers.array_write(layers.reshape(tgt_ids, (-1, 1)), step_idx) pos_biases = layers.array_write(layers.reshape(tgt_pos, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) tgt_masks = layers.array_write(tgt_input_mask, step_idx) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) pos_bias = layers.array_read(array=pos_biases, i=step_idx) pos_bias = layers.gather(input=pos_bias, index=parent_idx) tmp_mask = layers.array_read(tgt_masks, i=step_idx) def gen_batch_like(value, dtype="int64", shape=[-1, 1, 1], is_scalar=True): if is_scalar: return layers.fill_constant_batch_size_like( input=parent_idx, value=value, shape=shape, dtype=dtype) else: return layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=parent_idx, value=1, shape=shape, dtype=dtype), y=value, axis=0) tmp_mask = layers.gather(input=tmp_mask, index=parent_idx) append_0_mask = gen_batch_like(0.0, dtype=tmp_mask.dtype) append_1_mask = gen_batch_like(1.0, dtype=tmp_mask.dtype) tmp_mask = layers.concat([tmp_mask, append_1_mask], axis=2) pre_mask = layers.concat([tmp_mask, append_0_mask], axis=2) cur_mask = layers.concat([tmp_mask, append_1_mask], axis=2) cur_ids = gen_batch_like(self.attn_id) pre_pos = gen_batch_like(step_idx, is_scalar=False) cur_pos = gen_batch_like(pos_idx, is_scalar=False) if self.continuous_position: pre_pos = pre_pos + pos_bias cur_pos = cur_pos + pos_bias dec_emb_ids = { "word_embedding": layers.concat([pre_ids, cur_ids], axis=1), "pos_embedding": layers.concat([pre_pos, cur_pos], axis=1) } if self.task_type == "dialog": role_ids = gen_batch_like(0) turn_ids = gen_batch_like(0) dec_emb_ids["role_embedding"] = layers.concat( [role_ids, role_ids], axis=1) dec_emb_ids["turn_embedding"] = layers.concat( [turn_ids, turn_ids], axis=1) else: sent_ids = gen_batch_like(self.tgt_type_id) dec_emb_ids["sent_embedding"] = layers.concat( [sent_ids, sent_ids], axis=1) dec_mask = layers.concat([pre_mask, cur_mask], axis=1) dec_out = ernie.encode(dec_emb_ids, dec_mask, parent_idx, remove_query=True) fc_out = self.cal_logit(dec_out[:, 1:, :], None) topk_scores, topk_indices = layers.topk( input=layers.softmax(fc_out), k=self.beam_size) pre_lenpen = layers.pow( (5.0 + layers.cast(step_idx, pre_scores.dtype)) / 6.0, self.length_penalty) cur_lenpen = layers.pow( (5.0 + layers.cast(pos_idx, pre_scores.dtype)) / 6.0, self.length_penalty) accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores * pre_lenpen, axis=0) / cur_lenpen topk_indices = layers.lod_reset(topk_indices, pre_ids) accu_scores = layers.lod_reset(accu_scores, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=self.beam_size, end_id=self.eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) layers.increment(x=pos_idx, value=1.0, in_place=True) layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.array_write(tmp_mask, i=step_idx, array=tgt_masks) layers.array_write(pos_bias, i=step_idx, array=pos_biases) layers.assign(gather_idx, parent_idx) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=self.beam_size, end_id=self.eos_idx) graph_vars = { "finished_ids": finished_ids, "finished_scores": finished_scores, "data_ids": data_ids } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def hyp_score(log_probs, length): factor = 1. lp = L.pow((5. + L.cast(length, 'float32')) / 6., factor) return log_probs / lp
size=8, param_attr=ParamAttr(name="fc01_weights"), bias_attr=ParamAttr(name="fc01_bias")) fc02_out_tensor = fluid.layers.fc( input=fc01_out_tensor, size=8, param_attr=ParamAttr(name="fc02_weights"), bias_attr=ParamAttr(name="fc02_bias")) # 建立损失函数 y_true = P.data(name='y_true', shape=[-1, 8], append_batch_size=False, dtype='float32') # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true - fc02_out_tensor, 2) # mseloss = (y_true - bn01_out_tensor) ** 2 # 也可以用python里的运算符**。 mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 # 优化器,选SGD optimizer = fluid.optimizer.SGD(learning_rate=lr) optimizer.minimize(mseloss) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): # 重新建立一次网络,用相同的张量名,不用写损失层 inputs = P.data(name='input_1', shape=[-1, 3], append_batch_size=False, dtype='float32')
stride=1, padding=1, param_attr=ParamAttr(name="conv02_weights"), bias_attr=ParamAttr(name="conv02_bias")) in_name = "in02" in02_out_tensor = innorm(conv02_out_tensor, name=in_name) act02_out_tensor = fluid.layers.leaky_relu(in02_out_tensor, alpha=0.1) # 建立损失函数 y_true = P.data(name='y_true', shape=[-1, 8, 28, 28], append_batch_size=False, dtype='float32') # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true - act02_out_tensor, 2) mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 # 优化器,选SGD optimizer = fluid.optimizer.SGD(learning_rate=lr) optimizer.minimize(mseloss) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): # 重新建立一次网络,用相同的张量名,不用写损失层 inputs = P.data(name='input_1', shape=[-1, 3, 28, 28], append_batch_size=False, dtype='float32') conv01_out_tensor = fluid.layers.conv2d(
def get_norm(indegree): float_degree = L.cast(indegree, dtype="float32") float_degree = L.clamp(float_degree, min=1.0) norm = L.pow(float_degree, factor=-0.5) return norm
def __iou_loss(self, pred, targets, positive_mask, weights=None): """ Calculate the loss for location prediction Args: pred (Variables): bounding boxes prediction targets (Variables): targets for positive samples positive_mask (Variables): mask of positive samples weights (Variables): weights for each positive samples Return: loss (Varialbes): location loss """ positive_mask = fluid.layers.reshape(positive_mask, (-1, )) # [批大小*所有格子数, ] plw = pred[:, 0] * positive_mask # [批大小*所有格子数, ], 预测的l pth = pred[:, 1] * positive_mask # [批大小*所有格子数, ], 预测的t prw = pred[:, 2] * positive_mask # [批大小*所有格子数, ], 预测的r pbh = pred[:, 3] * positive_mask # [批大小*所有格子数, ], 预测的b tlw = targets[:, 0] * positive_mask # [批大小*所有格子数, ], 真实的l tth = targets[:, 1] * positive_mask # [批大小*所有格子数, ], 真实的t trw = targets[:, 2] * positive_mask # [批大小*所有格子数, ], 真实的r tbh = targets[:, 3] * positive_mask # [批大小*所有格子数, ], 真实的b tlw.stop_gradient = True trw.stop_gradient = True tth.stop_gradient = True tbh.stop_gradient = True area_target = (tlw + trw) * (tth + tbh) # [批大小*所有格子数, ], 真实的面积 area_predict = (plw + prw) * (pth + pbh) # [批大小*所有格子数, ], 预测的面积 ilw = fluid.layers.elementwise_min(plw, tlw) # [批大小*所有格子数, ], 相交矩形的l irw = fluid.layers.elementwise_min(prw, trw) # [批大小*所有格子数, ], 相交矩形的r ith = fluid.layers.elementwise_min(pth, tth) # [批大小*所有格子数, ], 相交矩形的t ibh = fluid.layers.elementwise_min(pbh, tbh) # [批大小*所有格子数, ], 相交矩形的b clw = fluid.layers.elementwise_max(plw, tlw) # [批大小*所有格子数, ], 包围矩形的l crw = fluid.layers.elementwise_max(prw, trw) # [批大小*所有格子数, ], 包围矩形的r cth = fluid.layers.elementwise_max(pth, tth) # [批大小*所有格子数, ], 包围矩形的t cbh = fluid.layers.elementwise_max(pbh, tbh) # [批大小*所有格子数, ], 包围矩形的b area_inter = (ilw + irw) * (ith + ibh) # [批大小*所有格子数, ], 相交矩形的面积 ious = (area_inter + 1.0) / (area_predict + area_target - area_inter + 1.0) ious = ious * positive_mask if self.iou_loss_type.lower() == "linear_iou": loss = 1.0 - ious elif self.iou_loss_type.lower() == "giou": area_uniou = area_predict + area_target - area_inter area_circum = (clw + crw) * (cth + cbh) + 1e-7 giou = ious - (area_circum - area_uniou) / area_circum loss = 1.0 - giou elif self.iou_loss_type.lower() == "iou": loss = 0.0 - fluid.layers.log(ious) elif self.iou_loss_type.lower() == "ciou": # 预测的矩形。cx_cy_w_h格式,以格子中心点为坐标原点。 pred_cx = (prw - plw) * 0.5 pred_cy = (pbh - pth) * 0.5 pred_w = (plw + prw) pred_h = (pth + pbh) pred_cx = L.reshape(pred_cx, (-1, 1)) pred_cy = L.reshape(pred_cy, (-1, 1)) pred_w = L.reshape(pred_w, (-1, 1)) pred_h = L.reshape(pred_h, (-1, 1)) pred_cx_cy_w_h = L.concat([pred_cx, pred_cy, pred_w, pred_h], -1) # [批大小*所有格子数, 4] # 真实的矩形。cx_cy_w_h格式,以格子中心点为坐标原点。 true_cx = (trw - tlw) * 0.5 true_cy = (tbh - tth) * 0.5 true_w = (tlw + trw) true_h = (tth + tbh) true_cx = L.reshape(true_cx, (-1, 1)) true_cy = L.reshape(true_cy, (-1, 1)) true_w = L.reshape(true_w, (-1, 1)) true_h = L.reshape(true_h, (-1, 1)) true_cx_cy_w_h = L.concat([true_cx, true_cy, true_w, true_h], -1) # [批大小*所有格子数, 4] # 预测的矩形。x0y0x1y1格式,以格子中心点为坐标原点。 boxes1_x0y0x1y1 = L.concat([ pred_cx_cy_w_h[:, :2] - pred_cx_cy_w_h[:, 2:] * 0.5, pred_cx_cy_w_h[:, :2] + pred_cx_cy_w_h[:, 2:] * 0.5 ], axis=-1) # 真实的矩形。x0y0x1y1格式,以格子中心点为坐标原点。 boxes2_x0y0x1y1 = L.concat([ true_cx_cy_w_h[:, :2] - true_cx_cy_w_h[:, 2:] * 0.5, true_cx_cy_w_h[:, :2] + true_cx_cy_w_h[:, 2:] * 0.5 ], axis=-1) # 包围矩形的左上角坐标、右下角坐标,shape 都是 (批大小*所有格子数, 2) enclose_left_up = L.elementwise_min(boxes1_x0y0x1y1[:, :2], boxes2_x0y0x1y1[:, :2]) enclose_right_down = L.elementwise_max(boxes1_x0y0x1y1[:, 2:], boxes2_x0y0x1y1[:, 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = L.pow(enclose_wh[:, 0], 2) + L.pow( enclose_wh[:, 1], 2) # 两矩形中心点距离的平方 p2 = L.pow(pred_cx_cy_w_h[:, 0] - true_cx_cy_w_h[:, 0], 2) \ + L.pow(pred_cx_cy_w_h[:, 1] - true_cx_cy_w_h[:, 1], 2) # 增加av。加上除0保护防止nan。 atan1 = L.atan(pred_cx_cy_w_h[:, 2] / (pred_cx_cy_w_h[:, 3] + 1e-9)) atan2 = L.atan(true_cx_cy_w_h[:, 2] / (true_cx_cy_w_h[:, 3] + 1e-9)) v = 4.0 * L.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - ious + v) ciou = ious - 1.0 * p2 / (enclose_c2 + 1e-9) - 1.0 * a * v loss = 1.0 - ciou else: raise KeyError loss = fluid.layers.reshape(loss, (-1, 1)) # [批大小*所有格子数, 1] if weights is not None: loss = loss * weights return loss
def beam_search(): """Beam search function""" max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.max_out_len, force_cpu=True) min_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.min_out_len) neg_inf = layers.fill_constant(shape=[1], dtype='float32', value=-INF) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) step_next_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=1, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [ { "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "static_k_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_v_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_k_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype), "static_v_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype) } for i in range(self._dec_n_layer) ] trigram_blocking = TrigramBlocking(start_tokens, self.tokenizer, use_fp16=self._use_fp16, beam_size=self.beam_size) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) # Since beam_search_op dosen't enforce pre_ids' shape, we can do # inplace reshape here which actually change the shape of pre_ids. # pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) # gather cell states corresponding to selected parent pre_src_words_attn_bias = layers.gather( tgt_src_words_attn_bias, index=parent_idx) pre_src_sents_attn_bias = layers.gather( tgt_src_sents_attn_bias, index=parent_idx) pre_graph_attn_bias = layers.gather(graph_attn_bias, index=parent_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input= pre_src_sents_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = self.decode( dec_input=(pre_ids, pre_pos, None, pre_src_words_attn_bias, pre_src_sents_attn_bias, pre_graph_attn_bias), enc_words_output=enc_words_output, enc_sents_output=enc_sents_output, caches=caches, gather_idx=parent_idx) # prevent generating end token if length less than min_out_len eos_index = layers.fill_constant( shape=[layers.shape(logits)[0]], dtype='int64', value=self.eos_idx) eos_index = fluid.one_hot(eos_index, depth=self.voc_size) less_cond = layers.cast(layers.less_than(x=step_idx, y=min_len), dtype='float32') less_val = layers.elementwise_mul(less_cond, neg_inf) eos_val = layers.elementwise_mul(eos_index, less_val, axis=0) revised_logits = layers.elementwise_add(logits, eos_val, axis=0) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) topk_scores, topk_indices = layers.topk( input=layers.softmax(revised_logits), k=self.beam_size) # Roll-Back previous-scores for length-penalty # previous-scores has been length-penaltied, before this timestep length-penalty, need roll-back # because of doing this, we need store the length-penaltied score in `scores` # while calculating use the un-penaltied score # -> safe for step_idx == 0 (initialization state), because previous-score == 0 pre_timestep_length_penalty = fluid.layers.pow( ((5.0 + fluid.layers.cast(step_idx, pre_scores.dtype)) / 6.0), self.len_penalty) pre_scores_wo_len_penalty = fluid.layers.elementwise_mul( pre_scores, pre_timestep_length_penalty) # calc trigram-blocking delta scores for current alive sequence if self.block_trigram: trigram_blocking.update_seq(pre_ids, parent_idx) trigram_blocking.expand_cand_seq(topk_indices) fluid.layers.py_func( func=trigram_blocking.blocking_forward, x=[ trigram_blocking.cand_seq, trigram_blocking.id2is_full_token ], out=trigram_blocking.delta_score_out, backward_func=None) layers.Print(trigram_blocking.delta_score_out, summarize=100, message="trigram_blocking.delta_score_out") pre_scores_wo_len_penalty = fluid.layers.elementwise_add( x=trigram_blocking.delta_score_out, y=pre_scores_wo_len_penalty, axis=0) # => [N, topk] accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores_wo_len_penalty, axis=0) cur_timestep_length_penalty = layers.pow( ((5.0 + layers.cast(step_next_idx, accu_scores.dtype)) / 6.0), self.len_penalty) curr_scores = layers.elementwise_div( accu_scores, cur_timestep_length_penalty) # beam_search op uses lod to differentiate branches. curr_scores = layers.lod_reset(curr_scores, pre_ids) topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=curr_scores, beam_size=self.beam_size, end_id=self.eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) layers.increment(x=step_next_idx, value=1.0, in_place=True) # cell states(caches) have been updated in wrap_decoder, # only need to update beam search states here. layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.assign(gather_idx, parent_idx) layers.assign(pre_src_words_attn_bias, tgt_src_words_attn_bias) layers.assign(pre_src_sents_attn_bias, tgt_src_sents_attn_bias) layers.assign(pre_graph_attn_bias, graph_attn_bias) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not( layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=self.beam_size, end_id=self.eos_idx) return finished_ids, finished_scores
def mse_loss(pred, label): loss = layers.pow((pred - label), 2) loss = layers.mean(loss) return loss
def _calc_obj_loss(self, output, obj, tobj, gt_box, batch_size, anchors, num_classes, downsample, ignore_thresh, scale_x_y): # A prediction bbox overlap any gt_bbox over ignore_thresh, # objectness loss will be ignored, process as follows: # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here # NOTE: img_size is set as 1.0 to get noramlized pred bbox bbox, prob = fluid.layers.yolo_box(x=output, img_size=fluid.layers.ones( shape=[batch_size, 2], dtype="int32"), anchors=anchors, class_num=num_classes, conf_thresh=0., downsample_ratio=downsample, clip_bbox=False, scale_x_y=scale_x_y) # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox # and gt bbox in each sample if batch_size > 1: # bbox: [N, 3*n_grid*n_grid, 4] # gt_box: [N, 50, 4] preds = fluid.layers.split( bbox, batch_size, dim=0) # 里面每个元素形状是[1, 3*n_grid*n_grid, 4] gts = fluid.layers.split(gt_box, batch_size, dim=0) # 里面每个元素形状是[1, 50, 4] else: preds = [bbox] gts = [gt_box] probs = [prob] ious = [] for pred, gt in zip(preds, gts): # pred: [1, 3*n_grid*n_grid, 4] # gt: [1, 50, 4] def box_xywh2xyxy(box): x = box[:, 0] y = box[:, 1] w = box[:, 2] h = box[:, 3] return fluid.layers.stack([ x - w / 2., y - h / 2., x + w / 2., y + h / 2., ], axis=1) pred = fluid.layers.squeeze(pred, axes=[0]) # [3*n_grid*n_grid, 4] gt = box_xywh2xyxy(fluid.layers.squeeze( gt, axes=[0])) # [50, 4] 且转换成x0y0x1y1格式 ious.append(fluid.layers.iou_similarity( pred, gt)) # [3*n_grid*n_grid, 50] 两组矩形两两之间的iou iou = fluid.layers.stack( ious, axis=0) # [N, 3*n_grid*n_grid, 50] 两组矩形两两之间的iou # 3. Get iou_mask by IoU between gt bbox and prediction bbox, # Get obj_mask by tobj(holds gt_score), calculate objectness loss max_iou = fluid.layers.reduce_max( iou, dim=-1) # [N, 3*n_grid*n_grid] 预测框和本图片所有gt的最高iou iou_mask = fluid.layers.cast( max_iou <= ignore_thresh, dtype="float32") # [N, 3*n_grid*n_grid] 候选负样本处为1 if self.match_score: max_prob = fluid.layers.reduce_max(prob, dim=-1) iou_mask = iou_mask * fluid.layers.cast(max_prob <= 0.25, dtype="float32") output_shape = fluid.layers.shape(output) an_num = len(anchors) // 2 iou_mask = fluid.layers.reshape( iou_mask, (-1, an_num, output_shape[2], output_shape[3])) # [N, 3, n_grid, n_grid] 候选负样本处为1 iou_mask.stop_gradient = True # NOTE: tobj holds gt_score, obj_mask holds object existence mask obj_mask = fluid.layers.cast( tobj > 0., dtype="float32") # [N, 3, n_grid, n_grid] 正样本处为1 obj_mask.stop_gradient = True noobj_mask = (1.0 - obj_mask) * iou_mask # [N, 3, n_grid, n_grid] 负样本处为1 noobj_mask.stop_gradient = True # For positive objectness grids, objectness loss should be calculated # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0 pred_conf = L.sigmoid(obj) if self.focalloss_on_obj: alpha = self.focalloss_alpha gamma = self.focalloss_gamma pos_loss = tobj * (0 - L.log(pred_conf + 1e-9)) * L.pow( 1 - pred_conf, gamma) * alpha neg_loss = noobj_mask * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow( pred_conf, gamma) * (1 - alpha) else: pos_loss = tobj * (0 - L.log(pred_conf + 1e-9)) neg_loss = noobj_mask * (0 - L.log(1 - pred_conf + 1e-9)) pos_loss = fluid.layers.reduce_sum(pos_loss, dim=[1, 2, 3]) neg_loss = fluid.layers.reduce_sum(neg_loss, dim=[1, 2, 3]) return pos_loss, neg_loss
def get_norm(indegree): """Get Laplacian Normalization""" float_degree = L.cast(indegree, dtype="float32") float_degree = L.clamp(float_degree, min=1.0) norm = L.pow(float_degree, factor=-0.5) return norm
conv02_out_tensor = fluid.layers.conv2d( input=conv01_out_tensor, num_filters=8, filter_size=3, stride=1, padding=1, param_attr=ParamAttr(name="conv02_weights"), bias_attr=ParamAttr(name="conv02_bias")) # 建立损失函数 y_true = P.data(name='y_true', shape=[-1, 8, 28, 28], append_batch_size=False, dtype='float32') # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true - conv02_out_tensor, 2) mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 # 优化器,选SGD optimizer = fluid.optimizer.SGD(learning_rate=lr) optimizer.minimize(mseloss) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): # 重新建立一次网络,用相同的张量名,不用写损失层 inputs = P.data(name='input_1', shape=[-1, 3, 28, 28], append_batch_size=False, dtype='float32') conv01_out_tensor = fluid.layers.conv2d(
print('------------------ step %d ------------------' % step) # ==================== train ==================== batch_data = np.random.normal(loc=0, scale=1, size=(2, 3, 28, 28)).astype(np.float32) y_true_arr = np.random.normal(loc=0, scale=1, size=(2, 8, 28, 28)).astype(np.float32) batch_data2 = paddle.to_tensor(batch_data, place=place) y_true_arr2 = paddle.to_tensor(y_true_arr, place=place) paddle_conv01_out, paddle_bn01_out, paddle_act01_out, paddle_conv02_out, paddle_bn02_out, paddle_act02_out = model( batch_data2) # 建立损失函数 # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。 mseloss = P.pow(y_true_arr2 - paddle_act02_out, 2) mseloss = P.reduce_mean(mseloss) # 再求平均,即mse损失函数 paddle_mseloss_out = mseloss.numpy() paddle_bn01_out = paddle_bn01_out.numpy() paddle_bn02_out = paddle_bn02_out.numpy() # 更新权重 mseloss.backward() optimizer.step() optimizer.clear_grad() print('train_forward:') # python代码模拟训练过程,与paddle的输出校验。我们希望和飞桨有相同的输出。 my_conv01_out = conv01.train_forward(batch_data) my_bn01_out = bn01.train_forward(my_conv01_out)