def sample_from_mog(self, y): """Sample from the output distribution where the output distribution is a mixture of Gaussians. Args: y (Variable): shape(B, T, C_output), dtype float32, the parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where `n_mixture` means the number of Gaussians in the mixture. Returns: Variable: shape(B, T), waveform sampled from the output distribution. """ batch_size, time_steps, output_dim = y.shape n_mixture = output_dim // 3 w, mu, log_std = F.split(y, 3, dim=-1) reshaped_w = F.reshape(w, (batch_size * time_steps, n_mixture)) prob_ids = F.sampling_id(F.softmax(reshaped_w)) prob_ids = F.reshape(prob_ids, (batch_size, time_steps)) prob_ids = prob_ids.numpy() index = np.array([[[b, t, prob_ids[b, t]] for t in range(time_steps)] for b in range(batch_size)]).astype("int32") index_var = dg.to_variable(index) mu_ = F.gather_nd(mu, index_var) log_std_ = F.gather_nd(log_std, index_var) dist = D.Normal(mu_, F.exp(log_std_)) samples = dist.sample(shape=[]) samples = F.clip(samples, min=-1., max=1.) return samples
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" _, vocab_size = logits.shape bsz, beam_width = state.log_probs.shape onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = L.log(L.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = L.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = L.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) gather_idx = L.concat( [L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) next_finished = L.reshape( L.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] #log.debug(gather_idx.numpy()) #log.debug(state.finished.numpy()) #log.debug(next_finished.numpy()) next_finished += L.cast(next_word_id == eos_id, 'int64') next_finished = L.cast(next_finished > 0, 'int64') #log.debug(next_word_id.numpy()) #log.debug(next_beam_id.numpy()) next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size logits_np = logits.numpy() for i in range(beam_size): logits_np[i][17963] = 0 # make [UNK] prob = 0 logits = D.to_variable(logits_np) bsz, beam_width = state.log_probs.shape onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = L.log(L.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = L.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = L.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) gather_idx = L.concat( [L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) next_finished = L.reshape( L.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] next_finished += L.cast(next_word_id == eos_id, 'int64') next_finished = L.cast(next_finished > 0, 'int64') next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def fast_nms(self, boxes, scores, masks, max_num_detections=100): iou_threshold = self.nms_thresh top_k = self.top_k # 同类方框根据得分降序排列 scores, idx = P.argsort(scores, axis=1, descending=True) idx = idx[:, :top_k] scores = scores[:, :top_k] num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1] idx = P.reshape(idx, (-1, )) boxes = P.gather(boxes, idx) boxes = P.reshape(boxes, (num_classes, num_dets, 4)) masks = P.gather(masks, idx) masks = P.reshape(masks, (num_classes, num_dets, -1)) # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU iou = jaccard(boxes, boxes) # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵 # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0 rows = P.range(0, num_dets, 1, 'int32') cols = P.range(0, num_dets, 1, 'int32') rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1]) cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets]) tri_mask = P.cast(rows > cols, 'float32') tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)), [num_classes, 1, 1]) iou = tri_mask * iou iou_max = P.reduce_max(iou, dim=1) # Now just filter out the ones higher than the threshold keep = P.where(iou_max <= iou_threshold) # Assign each kept detection to its corresponding class classes = P.range(0, num_classes, 1, 'int32') classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets]) classes = P.gather_nd(classes, keep) boxes = P.gather_nd(boxes, keep) masks = P.gather_nd(masks, keep) scores = P.gather_nd(scores, keep) # Only keep the top cfg.max_num_detections highest scores across all classes scores, idx = P.argsort(scores, axis=0, descending=True) idx = idx[:max_num_detections] scores = scores[:max_num_detections] classes = P.gather(classes, idx) boxes = P.gather(boxes, idx) masks = P.gather(masks, idx) return boxes, masks, classes, scores
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k): ''' :param boxes: [?, 4] :param scores: [80, ?] ''' # 同类方框根据得分降序排列 scores, idx = P.argsort(scores, axis=1, descending=True) idx = idx[:, :keep_top_k] scores = scores[:, :keep_top_k] num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1] idx = P.reshape(idx, (-1, )) boxes = P.gather(boxes, idx) boxes = P.reshape(boxes, (num_classes, num_dets, 4)) # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU iou = _iou(boxes, boxes) # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵 # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0 rows = P.range(0, num_dets, 1, 'int32') cols = P.range(0, num_dets, 1, 'int32') rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1]) cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets]) tri_mask = P.cast(rows > cols, 'float32') tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)), [num_classes, 1, 1]) iou = tri_mask * iou iou_max = P.reduce_max(iou, dim=1) # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。 keep = P.where(iou_max <= nms_thresh) # Assign each kept detection to its corresponding class classes = P.range(0, num_classes, 1, 'int32') classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets]) classes = P.gather_nd(classes, keep) boxes = P.gather_nd(boxes, keep) scores = P.gather_nd(scores, keep) # Only keep the top cfg.max_num_detections highest scores across all classes scores, idx = P.argsort(scores, axis=0, descending=True) idx = idx[:nms_top_k] scores = scores[:nms_top_k] classes = P.gather(classes, idx) boxes = P.gather(boxes, idx) return boxes, scores, classes
def _get_pooled_output(self, enc_out, idx=None, name="pooled"): """Get pooled output of the last output embedding in Transformer. Args: enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size] idx (optional): the selected indices in pooling operator, shape is [batch_size, 1] or [batch_size, 2]. name: a string, the name of the pooling layer. Returns: pooled_out: the pooled output embedding, shape is [batch_size, hidden_size]. """ if idx is None: feat = enc_out[:, 0] elif len(idx.shape) == 2 and idx.shape[1] == 1: enc_out = layers.squeeze(enc_out, [1]) feat = layers.gather(input=enc_out, index=idx) elif len(idx.shape) == 2 and idx.shape[1] == 2: feat = layers.gather_nd(input=enc_out, index=idx) else: raise ValueError(f"Invalid indices shape {idx.shape} is used") pooled_out = layers.fc( input=feat, size=self.hidden_size, act="tanh", param_attr=fluid.ParamAttr(name=f"{name}_fc.w_0", initializer=self.param_initializer), bias_attr=f"{name}_fc.b_0") return pooled_out
def no_nms(bboxes, scores, score_threshold, keep_top_k): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if keep_top_k > 0 and len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def masked_select(input, mask): """Select the input value according to the mask Arags: input: input matrix mask: mask matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> mask [ [True, True, False], [True, False, False] ] >>> masked_select(input, mask) [1, 2, 4] """ select = layers.where(mask) output = layers.gather_nd(input, select) return output
def forward(self, src_ids, *args, **kwargs): tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy()) #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy()) if encode_only: return None, None, info elif tgt_labels is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) #log.debug('input shape %s' % repr(src_ids.shape)) #log.debug(L.gather_nd(src_ids, tgt_pos).numpy()) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1)) ) return loss, logits_2d, info
def batch_scatter(ref, indices, updates, in_place=False, overwrite=False): """Scatter updates to ref, according to corrensponding index in indices in each batch. Currently, it only support 2d Tensor. Args: ref (Variable): with shape [batch_size, ...] indices (Variable): with shape [batch_size, 1] updates (Variable): with shape [batch_size] in_place (bool): if True, scatter result will be assign to ref. otherwise, a new Tensor will be returned. Default is False. overwrite (bool): if True, scatter will over write corrensponding elements. Default is False. Returns: TODO Raises: NULL Examples: ref [[1, 1, 1], [1, 1, 1]] indices [[2], [1]] updates [2, 3] return [[1, 1, 2], [1, 3, 1]] """ ref_dtype = ref.dtype if ref_dtype not in PaddleVarType.floats: ref_in = layers.cast(ref, dtype='float32') else: ref_in = ref if updates.dtype != ref_in.dtype: updates = layers.cast(updates, dtype=ref_in.dtype) batch_size = layers.cast(layers.shape(ref_in)[0], dtype=indices.dtype) zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1) batch_indices = layers.unsqueeze( layers.range(zero, batch_size, one, dtype=indices.dtype), [1]) coord = layers.concat([batch_indices, indices], axis=1) if overwrite: mask = layers.gather_nd(ref_in, coord) mask = layers.elementwise_sub(layers.zeros_like(mask), mask) ref_in = layers.scatter_nd_add(ref_in, coord, mask) output = layers.scatter_nd_add(ref_in, coord, updates) if ref_dtype not in PaddleVarType.floats: output = layers.cast(output, dtype=ref_dtype) if in_place: layers.assign(output, ref) return ref else: return output
def forward(self, src_ids, *args, **kwargs): pooled, encoded = ErnieModel.forward(self, src_ids, *args, **kwargs) encoded_2d = L.gather_nd(encoded, L.where(src_ids == mask_id)) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul( encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias return logits_2d
def seq_gather(seq, idxs): """seq是[None, seq_len, s_size]的格式, idxs是[None, 1]的格式, 在seq的第i个序列中选出第idxs[i]个向量, 最终输出[None, s_size]的向量。 """ idxs = layers.cast(idxs, dtype="int32") batch_idxs = layers.arange(0, seq.shape[0], dtype="int32") batch_idxs = layers.unsqueeze(batch_idxs, 1) idxs = layers.concat([batch_idxs, idxs], 1) return layers.gather_nd(seq, idxs)
def _calc_logits(self, enc_out, tgt_idx=None, name=""): """Get the logits of generation task. The network may share weight with token embeddings. Args: enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size] tgt_idx (optional): the indices of prediction tokens, shape is [num_predictions, 2]. Returns: logits: the logits of prediction task, shape is [num_predictions, vocab_size]. """ if tgt_idx is None: seq_feat = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) elif len(tgt_idx.shape) == 2 and tgt_idx.shape[1] == 2: seq_feat = layers.gather_nd(input=enc_out, index=tgt_idx) else: raise ValueError(f"Invalid indices shape {tgt_idx.shape} is used") seq_trans_feat = layers.fc( input=seq_feat, size=self.emb_size, act=self.hidden_act, param_attr=fluid.ParamAttr( name="mask_lm_trans_fc.w_0", initializer=self.param_initializer), bias_attr="mask_lm_trans_fc.b_0") seq_trans_feat = pre_process_layer( seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans") if self.weight_sharing: logits = layers.matmul( x=seq_trans_feat, y=fluid.default_main_program().global_block().var( name + self.token_emb_name), transpose_y=True) if self.cls_bias: logits += layers.create_parameter( shape=[self.vocab_size], dtype=self.dtype, attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"), is_bias=True) else: seq_out_bias_attr = "mask_lm_out_fc.b_0" if self.cls_bias else False logits = layers.fc( input=seq_trans_feat, size=self.vocab_size, param_attr=fluid.ParamAttr( name="mask_lm_out_fc.w_0", initializer=self.param_initializer), bias_attr=seq_out_bias_attr) return logits
def _calc_bow_logits(self, enc_out, bow_idx): """Get the logits of BoW task. The network may share weight with token embeddings. Args: enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_dim] bow_idx: the indices of prediction tokens, shape is [num_predictions, 1] or [num_predictions, 2]. Returns: logits: the logits of prediction task, shape is [num_predictions, vocab_size]. """ if len(bow_idx.shape) == 2 and bow_idx.shape[1] == 1: enc_out = layers.squeeze(enc_out, [1]) bow_feat = layers.gather(input=enc_out, index=bow_idx, overwrite=False) elif len(bow_idx.shape) == 2 and bow_idx.shape[1] == 2: bow_feat = layers.gather_nd(input=enc_out, index=bow_idx) else: raise ValueError(f"Invalid indices shape {bow_idx.shape} is used") bow_trans_feat = layers.fc( input=bow_feat, size=self.emb_size, act=self.hidden_act, param_attr=fluid.ParamAttr( name="bow_trans_fc.w_0", initializer=self.param_initializer), bias_attr="bow_trans_fc.b_0") bow_trans_feat = pre_process_layer( bow_trans_feat, self.post_cls_cmd, name="bow_trans") if self.weight_sharing: bow_logits = layers.matmul( x=bow_trans_feat, y=fluid.default_main_program().global_block().var( self.token_emb_name), transpose_y=True) if self.cls_bias: bow_logits += layers.create_parameter( shape=[self.vocab_size], dtype=self.dtype, attr=fluid.ParamAttr(name="bow_out_fc.b_0"), is_bias=True) else: bow_out_bias_attr = "bow_out_fc.b_0" if self.cls_bias else False bow_logits = layers.fc(input=bow_trans_feat, size=self.vocab_size, param_attr=fluid.ParamAttr( name="bow_out_fc.w_0", initializer=self.param_initializer), bias_attr=bow_out_bias_attr) return bow_logits
def _birnn_encoder(self, inputs, input_len, name_lens, name_pos, name_tok_len): """forward Args: inputs (Variable): shape=[batch_size, max_seq_len, hidden_size] input_len (Variable): shape=[batch_size] name_lens (Variable): shape=[batch_size] name_pos (Variable): shape=[batch_size, max_name_len, max_tokens] name_tok_len (Variable): shape=[batch_size, max_name_len] Returns: TODO Raises: NULL """ rnn_output, rnn_final_state = self._rnn_encoder.forward( inputs, input_len) max_name_len = name_pos.shape[1] name_begin = name_pos[:, :, 0] name_repr_mask = layers.sequence_mask(name_lens, max_name_len, dtype=name_tok_len.dtype) len_delta = layers.elementwise_mul(name_tok_len - 1, name_repr_mask, axis=0) name_end = name_begin + len_delta if self._bidirectional: name_fwd_repr_gathered = nn_utils.batch_gather_2d( rnn_output, name_end)[:, :, :self._hidden_size] name_bwd_repr_gathered = nn_utils.batch_gather_2d( rnn_output, name_begin)[:, :, self._hidden_size:] name_repr_gathered = layers.concat( input=[name_fwd_repr_gathered, name_bwd_repr_gathered], axis=-1) new_hidden_size = self._hidden_size * 2 else: name_repr_gathered = layers.gather_nd(rnn_output, name_end) new_hidden_size = self._hidden_size name_repr_tmp = layers.reshape( name_repr_gathered, shape=[-1, max_name_len, new_hidden_size]) name_repr_mask = layers.cast(name_repr_mask, dtype=name_repr_tmp.dtype) name_repr = layers.elementwise_mul(name_repr_tmp, name_repr_mask, axis=0) return name_repr, None
def matrix_nms(bboxes, scores, score_threshold, post_threshold, nms_top_k, keep_top_k, use_gaussian=False, gaussian_sigma=2.): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top nms_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if nms_top_k > 0 and len(sort_inds) > nms_top_k: sort_inds = sort_inds[:nms_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) # Matrix NMS kernel = 'gaussian' if use_gaussian else 'linear' cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma) # filter. keep = L.where(cate_scores >= post_threshold) if len(keep) == 0: return L.zeros((0, 6), 'float32') - 1.0 bboxes = L.gather(bboxes, keep) cate_scores = L.gather(cate_scores, keep) cate_labels = L.gather(cate_labels, keep) # sort and keep keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def batch_gather(var, indices): """Gather slices from var in each batch, according to corrensponding index in indices. Currently, it only support 2d Tensor. Args: var (Variable): with shape [batch_size, ...] indices (Variable): with shape [batch_size, 1] or [batch_size] Returns: Variable with shape [batch_size] Raises: NULL Examples: var [[1, 2, 3], [4, 5, 6]] indices [[2], [1]] return [[3], [5]] """ if len(indices.shape) >= 2 and indices.shape[-1] != 1: raise ValueError( 'shape of indices error. it should be a 1-D layers, or a 2-D layers which ' 'the 2nd dimension is 1. but got shape = %s' % (str(indices.shape), )) if len(indices.shape) == 1: indices = layers.reshape(indices, shape=[-1, 1]) reshape_input = len(var.shape) == 1 if reshape_input: var = PaddleFluidWrapper.reshape(var, shape=[-1, 1]) batch_size = layers.cast(layers.shape(indices)[0], dtype=indices.dtype) zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1) batch_indices = layers.unsqueeze( layers.range(zero, batch_size, one, dtype=indices.dtype), [1]) coord = layers.concat([batch_indices, indices], axis=1) coord.stop_gradient = True output = layers.gather_nd(var, coord) if reshape_input: output = PaddleFluidWrapper.reshape(output, shape=[-1]) return output
def index_sample(x, index): """Select input value according to index Arags: input: input matrix index: index matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> index [ [1, 2], [0, 1] ] >>> index_sample(input, index) [ [2, 3], [4, 5] ] """ x_s = x.shape dim = len(index.shape) - 1 assert x_s[:dim] == index.shape[:dim] r_x = layers.reshape(x, shape=(-1, *x_s[dim:])) index = layers.reshape(index, shape=(index.shape[0], index.shape[1], 1)) # generate arange index, shape like index # arr_index = layers.arange(start=0, end=layers.cast(layers.shape(x)[0], ), dtype=index.dtype) batch_size = layers.cast(layers.shape(index)[0], dtype=index.dtype) zero = layers.fill_constant(shape=[1], dtype=index.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=index.dtype, value=1) arr_index = layers.unsqueeze( layers.range(zero, batch_size, one, dtype=index.dtype), [1, 2]) arr_index = layers.expand_as(arr_index, index) # genrate new index new_index = layers.concat([arr_index, index], -1) new_index = layers.reshape(new_index, (-1, 2)) # get output out = layers.gather_nd(r_x, new_index) out = layers.reshape(out, (-1, x_s[-1] * 2)) return out
def batch_gather_2d(var, indices): """Gather slices from var in each batch, according to corrensponding index in indices. Currently, it only support 2d Tensor. Args: var (Variable): with shape [batch_size, ...] indices (Variable): with shape [batch_size, max_len] Returns: Variable with shape [batch_size] Raises: NULL Examples: var [[1, 2, 3], [4, 5, 6]] indices [[2, 0], [1, 2]] return [[3, 1], [5, 6]] """ if len(indices.shape) != 2: raise ValueError('shape of indices error. it should be a 2-D layers. ' 'but got shape = %s' % (str(indices.shape), )) batch_size = layers.shape(indices)[0] zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1) end = layers.cast(batch_size, dtype=indices.dtype) batch_indices_1d = layers.unsqueeze( layers.range(zero, end, one, dtype=indices.dtype), [1]) seq_len = indices.shape[1] batch_indices = layers.expand(batch_indices_1d, [1, seq_len]) coord_2d = layers.concat( [layers.unsqueeze(batch_indices, [2]), layers.unsqueeze(indices, [2])], axis=2) coord_2d.stop_gradient = True coord_1d = layers.reshape(coord_2d, shape=[-1, 2]) output_1d = layers.gather_nd(var, coord_1d) output_2d = layers.reshape(output_1d, [batch_size, seq_len, var.shape[-1]]) return output_2d
def forward(self, *args, **kwargs): """ Args tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]): ground trouth target sequence id (hard label) or distribution (soft label) tgt_pos(`Variable` of shape [n_targets, 2]): index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)` encoder_only(Bool): if set, will not return loss, logits_2d Returns: loss(`Variable` of shape []): cross entropy loss mean over every target label. if `encode_only`, returns None. logits(`Variable` of shape [n_targets, vocab_size]): logits for every targets. if `encode_only`, returns None. info(Dictionary): see `ErnieModel` """ tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, *args, **kwargs) if encode_only: return None, None, info elif tgt_labels is None or tgt_pos is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy( logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) return loss, logits_2d, info
def index_sample(x, index): """Select input value according to index Arags: input: input matrix index: index matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> index [ [1, 2], [0, 1] ] >>> index_sample(input, index) [ [2, 3], [4, 5] ] """ x_s = x.shape dim = len(index.shape) - 1 assert x_s[:dim] == index.shape[:dim] r_x = layers.reshape(x, shape=(-1, *x_s[dim:])) index = layers.reshape(index, shape=(len(r_x), -1, 1)) # generate arange index, shape like index arr_index = layers.arange(start=0, end=len(index), dtype=index.dtype) arr_index = layers.unsqueeze(arr_index, axes=[1, 2]) arr_index = layers.expand_as(arr_index, index) # genrate new index new_index = layers.concat((arr_index, index), -1) new_index = layers.reshape(new_index, (-1, 2)) # get output out = layers.gather_nd(r_x, new_index) out = layers.reshape(out, (*x_s[:dim], -1)) return out
def build_and_run_program(place, batch_size, beam_size, stop_gradient=False): fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 np.random.seed(2) x = layers.assign( np.random.rand(batch_size, beam_size, 32).astype("float32")) indices = fluid.data(shape=[None, beam_size], dtype="int64", name="indices") step_idx = layers.fill_constant( shape=[1], dtype="int64", value=0, force_cpu=True) max_len = layers.fill_constant( shape=[1], dtype="int64", value=10, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) scores = layers.array_write(x, step_idx) with while_op.block(): bs = layers.cast(layers.shape(x)[0], "int64") for _ in range(20): bs = layers.cast(bs, 'int64') bs.stop_gradient = stop_gradient batch_pos = layers.expand( layers.unsqueeze( layers.range( 0, bs, 1, dtype=bs.dtype), [1]), [1, beam_size]) topk_coordinates = layers.stack([batch_pos, indices], axis=2) topk_coordinates.stop_gradient = stop_gradient score = layers.gather_nd(x, topk_coordinates) layers.increment(x=step_idx, value=1.0, in_place=True) layers.array_write(score, i=step_idx, array=scores) length_cond = layers.less_than(x=step_idx, y=max_len) layers.assign(length_cond, cond) out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0] loss = layers.reduce_mean(out) opt = fluid.optimizer.Adam(0.01) opt.minimize(loss) exe = fluid.Executor(place) data = np.random.random_integers( low=0, high=beam_size - 1, size=(batch_size, beam_size)).astype("int64") loss_val, = exe.run(feed={"indices": data}, fetch_list=[loss]) return loss_val
def forward(self, indices, speaker_position_rate=None): """ Args: indices (Variable): shape (B, T), dtype: int64, position indices, where B means the batch size, T means the time steps. speaker_position_rate (Variable | float, optional), position rate. It can be a float point number or a Variable with shape (1,), then this speaker_position_rate is used for every example. It can also be a Variable with shape (B, ), which contains a speaker position rate for each utterance. Returns: out (Variable): shape(B, T, C_pos), dtype float32, position embedding, where C_pos means position embedding size. """ batch_size, time_steps = indices.shape # convert speaker_position_rate to a Variable with shape(B, ) if isinstance(speaker_position_rate, float): speaker_position_rate = dg.to_variable( np.array([speaker_position_rate]).astype("float32")) speaker_position_rate = F.expand(speaker_position_rate, [batch_size]) elif isinstance(speaker_position_rate, fluid.framework.Variable) \ and list(speaker_position_rate.shape) == [1]: speaker_position_rate = F.expand(speaker_position_rate, [batch_size]) assert len(speaker_position_rate.shape) == 1 and \ list(speaker_position_rate.shape) == [batch_size] weight = compute_position_embedding(self.weight, speaker_position_rate) # (B, V, C) # make indices for gather_nd batch_id = F.expand( F.unsqueeze( F.range( 0, batch_size, 1, dtype="int64"), [1]), [1, time_steps]) # (B, T, 2) gather_nd_id = F.stack([batch_id, indices], -1) out = F.gather_nd(weight, gather_nd_id) return out
def forward(self, *args, **kwargs): """ Args: nsp_labels (optional, `Variable` of shape [batch_size]): labels for `next sentence prediction` tasks mlm_pos (optional, `Variable` of shape [n_mask, 2]): index of mask_id in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)` labels (optional, `Variable` of shape [n_mask]): labels for `mask language model` tasks, the original token indices in masked position in `src_ids` Returns: loss (`Variable` of shape []): total_loss of `next sentence prediction` and `masked language model` mlm_loss (`Variable` of shape []): loss for `masked language model` task nsp_loss (`Variable` of shape []): loss for `next sentence prediction` task """ mlm_labels = kwargs.pop('labels') mlm_pos = kwargs.pop('mlm_pos') nsp_labels = kwargs.pop('nsp_labels') pooled, encoded = super(ErnieModelForPretraining, self).forward(*args, **kwargs) if len(mlm_labels.shape) == 1: mlm_labels = L.reshape(mlm_labels, [-1, 1]) if len(nsp_labels.shape) == 1: nsp_labels = L.reshape(nsp_labels, [-1, 1]) nsp_loss = self.pooler_heads[0](pooled, nsp_labels) encoded_2d = L.gather_nd(encoded, mlm_pos) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul( encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias mlm_loss = L.reduce_mean( L.softmax_with_cross_entropy(logits_2d, mlm_labels)) total_loss = mlm_loss + nsp_loss return total_loss, mlm_loss, nsp_loss
def net(self, inputs, is_infer=False): if is_infer: bs = self.evaluate_batch_size else: bs = self.train_batch_size stdv = 1.0 / math.sqrt(self.hidden_size) def embedding_layer(input, table_name, emb_dim, initializer_instance=None): emb = fluid.embedding( input=input, size=[self.dict_size, emb_dim], param_attr=fluid.ParamAttr( name=table_name, initializer=initializer_instance)) return emb sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv) items_emb = embedding_layer(inputs[0], "emb", self.hidden_size, sparse_initializer) pre_state = items_emb for i in range(self.step): pre_state = layers.reshape( x=pre_state, shape=[bs, -1, self.hidden_size]) state_in = layers.fc( input=pre_state, name="state_in", size=self.hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, uniq_max, h] state_out = layers.fc( input=pre_state, name="state_out", size=self.hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, uniq_max, h] state_adj_in = layers.matmul(inputs[3], state_in) # [batch_size, uniq_max, h] state_adj_out = layers.matmul( inputs[4], state_out) # [batch_size, uniq_max, h] gru_input = layers.concat([state_adj_in, state_adj_out], axis=2) gru_input = layers.reshape( x=gru_input, shape=[-1, self.hidden_size * 2]) gru_fc = layers.fc(input=gru_input, name="gru_fc", size=3 * self.hidden_size, bias_attr=False) pre_state, _, _ = fluid.layers.gru_unit( input=gru_fc, hidden=layers.reshape( x=pre_state, shape=[-1, self.hidden_size]), size=3 * self.hidden_size) final_state = layers.reshape( pre_state, shape=[bs, -1, self.hidden_size]) seq = layers.gather_nd(final_state, inputs[1]) last = layers.gather_nd(final_state, inputs[2]) seq_fc = layers.fc( input=seq, name="seq_fc", size=self.hidden_size, bias_attr=False, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, seq_max, h] last_fc = layers.fc(input=last, name="last_fc", size=self.hidden_size, bias_attr=False, act=None, num_flatten_dims=1, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [bathc_size, h] seq_fc_t = layers.transpose( seq_fc, perm=[1, 0, 2]) # [seq_max, batch_size, h] add = layers.elementwise_add(seq_fc_t, last_fc) # [seq_max, batch_size, h] b = layers.create_parameter( shape=[self.hidden_size], dtype='float32', default_initializer=fluid.initializer.Constant(value=0.0)) # [h] add = layers.elementwise_add(add, b) # [seq_max, batch_size, h] add_sigmoid = layers.sigmoid(add) # [seq_max, batch_size, h] add_sigmoid = layers.transpose( add_sigmoid, perm=[1, 0, 2]) # [batch_size, seq_max, h] weight = layers.fc( input=add_sigmoid, name="weight_fc", size=1, act=None, num_flatten_dims=2, bias_attr=False, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, seq_max, 1] weight *= inputs[5] weight_mask = layers.elementwise_mul( seq, weight, axis=0) # [batch_size, seq_max, h] global_attention = layers.reduce_sum( weight_mask, dim=1) # [batch_size, h] final_attention = layers.concat( [global_attention, last], axis=1) # [batch_size, 2*h] final_attention_fc = layers.fc( input=final_attention, name="final_attention_fc", size=self.hidden_size, bias_attr=False, act=None, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, h] # all_vocab = layers.create_global_var( # shape=[items_num - 1], # value=0, # dtype="int64", # persistable=True, # name="all_vocab") all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32') all_vocab = fluid.layers.cast( x=fluid.layers.assign(all_vocab), dtype='int64') all_emb = fluid.embedding( input=all_vocab, param_attr=fluid.ParamAttr( name="emb", initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), size=[self.dict_size, self.hidden_size]) # [all_vocab, h] logits = layers.matmul( x=final_attention_fc, y=all_emb, transpose_y=True) # [batch_size, all_vocab] softmax = layers.softmax_with_cross_entropy( logits=logits, label=inputs[6]) # [batch_size, 1] self.loss = layers.reduce_mean(softmax) # [1] self.acc = layers.accuracy(input=logits, label=inputs[6], k=20) self._cost = self.loss if is_infer: self._infer_results['acc'] = self.acc self._infer_results['loss'] = self.loss return self._metrics["LOSS"] = self.loss self._metrics["train_acc"] = self.acc
def network(items_num, hidden_size, step, bs): stdv = 1.0 / math.sqrt(hidden_size) items = fluid.data(name="items", shape=[bs, -1], dtype="int64") #[batch_size, uniq_max] seq_index = fluid.data(name="seq_index", shape=[bs, -1, 2], dtype="int32") #[batch_size, seq_max, 2] last_index = fluid.data(name="last_index", shape=[bs, 2], dtype="int32") #[batch_size, 2] adj_in = fluid.data(name="adj_in", shape=[bs, -1, -1], dtype="float32") #[batch_size, seq_max, seq_max] adj_out = fluid.data(name="adj_out", shape=[bs, -1, -1], dtype="float32") #[batch_size, seq_max, seq_max] mask = fluid.data(name="mask", shape=[bs, -1, 1], dtype="float32") #[batch_size, seq_max, 1] label = fluid.data(name="label", shape=[bs, 1], dtype="int64") #[batch_size, 1] datas = [items, seq_index, last_index, adj_in, adj_out, mask, label] py_reader = fluid.io.DataLoader.from_generator(capacity=256, feed_list=datas, iterable=False) feed_datas = datas items_emb = fluid.embedding( input=items, param_attr=fluid.ParamAttr(name="emb", initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), size=[items_num, hidden_size]) #[batch_size, uniq_max, h] pre_state = items_emb for i in range(step): pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size]) state_in = layers.fc( input=pre_state, name="state_in", size=hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)), bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[batch_size, uniq_max, h] state_out = layers.fc( input=pre_state, name="state_out", size=hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)), bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[batch_size, uniq_max, h] state_adj_in = layers.matmul(adj_in, state_in) #[batch_size, uniq_max, h] state_adj_out = layers.matmul(adj_out, state_out) #[batch_size, uniq_max, h] gru_input = layers.concat([state_adj_in, state_adj_out], axis=2) gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2]) gru_fc = layers.fc(input=gru_input, name="gru_fc", size=3 * hidden_size, bias_attr=False) pre_state, _, _ = fluid.layers.gru_unit(input=gru_fc, hidden=layers.reshape( x=pre_state, shape=[-1, hidden_size]), size=3 * hidden_size) final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size]) seq = layers.gather_nd(final_state, seq_index) last = layers.gather_nd(final_state, last_index) seq_fc = layers.fc( input=seq, name="seq_fc", size=hidden_size, bias_attr=False, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[batch_size, seq_max, h] last_fc = layers.fc( input=last, name="last_fc", size=hidden_size, bias_attr=False, act=None, num_flatten_dims=1, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[bathc_size, h] seq_fc_t = layers.transpose(seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h] add = layers.elementwise_add(seq_fc_t, last_fc) #[seq_max, batch_size, h] b = layers.create_parameter( shape=[hidden_size], dtype='float32', default_initializer=fluid.initializer.Constant(value=0.0)) #[h] add = layers.elementwise_add(add, b) #[seq_max, batch_size, h] add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h] add_sigmoid = layers.transpose(add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h] weight = layers.fc( input=add_sigmoid, name="weight_fc", size=1, act=None, num_flatten_dims=2, bias_attr=False, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[batch_size, seq_max, 1] weight *= mask weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h] global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h] final_attention = layers.concat([global_attention, last], axis=1) #[batch_size, 2*h] final_attention_fc = layers.fc( input=final_attention, name="final_attention_fc", size=hidden_size, bias_attr=False, act=None, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) #[batch_size, h] all_vocab = layers.create_global_var(shape=[items_num - 1], value=0, dtype="int64", persistable=True, name="all_vocab") all_emb = fluid.embedding(input=all_vocab, param_attr=fluid.ParamAttr( name="emb", initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), size=[items_num, hidden_size]) #[all_vocab, h] logits = layers.matmul(x=final_attention_fc, y=all_emb, transpose_y=True) #[batch_size, all_vocab] softmax = layers.softmax_with_cross_entropy(logits=logits, label=label) #[batch_size, 1] loss = layers.reduce_mean(softmax) # [1] acc = layers.accuracy(input=logits, label=label, k=50) return loss, acc, py_reader, feed_datas, logits
def to_2d(t_3d): t_2d = L.gather_nd(t_3d, pad_idx) return t_2d
def gather(x, indices, batch_pos): topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2) return layers.gather_nd(x, topk_coordinates)
def __call__(self, kernel_preds, cls_preds, mask_protos, batch_gt_objs_tensors, batch_gt_clss_tensors, batch_gt_masks_tensors, batch_gt_pos_idx_tensors): ''' :param kernel_preds: kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid], 每个格子的预测卷积核。 从 小感受野 到 大感受野。 :param cls_preds: cls_preds里每个元素形状是 [N, 80, seg_num_grid, seg_num_grid], 每个格子的预测概率,未进行sigmoid()激活。 从 小感受野 到 大感受野。 :param mask_protos: [bs, 256, s4, s4] 掩码原型 :param batch_gt_objs_tensors: 里每个元素形状是[N, seg_num_grid, seg_num_grid, 1], 每个格子的objness。 从 小感受野 到 大感受野。 :param batch_gt_clss_tensors: 里每个元素形状是[N, seg_num_grid, seg_num_grid, 80], 每个格子真实类别onehot。 从 小感受野 到 大感受野。 :param batch_gt_masks_tensors: 里每个元素形状是[N, -1, s4, s4], 真实掩码。 从 小感受野 到 大感受野。 :param batch_gt_pos_idx_tensors: 里每个元素形状是[N, -1, 3], 正样本的下标。 从 小感受野 到 大感受野。 :return: ''' batch_size = self.batch_size num_layers = len(kernel_preds) # ================= 计算损失 ================= num_ins = 0. # 记录这一批图片的正样本个数 loss_clss, loss_masks = [], [] for bid in range(batch_size): for lid in range(num_layers): # ================ 掩码损失 ====================== mask_proto = mask_protos[bid] # [256, s4, s4] 这张图片产生的掩码原型。 kernel_pred = kernel_preds[lid][ bid] # [256, seg_num_grid, seg_num_grid] 格子预测的卷积核(yolact中的“掩码系数”) kernel_pred = L.transpose( kernel_pred, perm=[1, 2, 0] ) # [seg_num_grid, seg_num_grid, 256] 格子预测的卷积核(yolact中的“掩码系数”) gt_objs = batch_gt_objs_tensors[lid][ bid] # [seg_num_grid, seg_num_grid, 1] gt_masks = batch_gt_masks_tensors[lid][bid] # [-1, s4, s4] pmidx = batch_gt_pos_idx_tensors[lid][bid] # [-1, 3] gt_objs.stop_gradient = True gt_masks.stop_gradient = True pmidx.stop_gradient = True idx_sum = L.reduce_sum(pmidx, dim=1) keep = L.where(idx_sum > -1) keep = L.reshape(keep, (-1, )) keep.stop_gradient = True pmidx = L.gather(pmidx, keep) # [M, 3] yx_idx = pmidx[:, :2] # [M, 2] m_idx = pmidx[:, 2] # [M, ] yx_idx.stop_gradient = True m_idx.stop_gradient = True # 抽出来 gt_obj = L.gather_nd(gt_objs, yx_idx) # [M, 1] 是否是真正的正样本。 pos_krn = L.gather_nd(kernel_pred, yx_idx) # [M, 256] 正样本的卷积核(掩码系数)。 gt_mask = L.gather(gt_masks, m_idx) # [M, s4, s4] 真实掩码。 # 正样本数量 num_ins += L.reduce_sum(gt_obj) # 生成预测掩码 mask_proto = L.transpose(mask_proto, perm=[1, 2, 0]) # [s4, s4, 256] masks = L.matmul(mask_proto, pos_krn, transpose_y=True) # [s4, s4, M] masks = L.sigmoid(masks) # [s4, s4, M] masks = L.transpose(masks, perm=[2, 0, 1]) # [M, s4, s4] loss_mask = self.dice_loss(masks, gt_mask, gt_obj) loss_masks.append(loss_mask) # ================ 分类损失。sigmoid_focal_loss() ====================== gamma = self.loss_gamma alpha = self.loss_alpha pred_conf = cls_preds[lid][ bid] # [80, seg_num_grid, seg_num_grid] 未进行sigmoid()激活。 pred_conf = L.transpose(pred_conf, perm=[ 1, 2, 0 ]) # [seg_num_grid, seg_num_grid, 80] 未进行sigmoid()激活。 pred_conf = L.sigmoid( pred_conf ) # [seg_num_grid, seg_num_grid, 80] 已进行sigmoid()激活。 gt_clss = batch_gt_clss_tensors[lid][ bid] # [seg_num_grid, seg_num_grid, 80] 真实类别onehot gt_clss.stop_gradient = True pos_loss = gt_clss * (0 - L.log(pred_conf + 1e-9)) * L.pow( 1 - pred_conf, gamma) * alpha neg_loss = ( 1.0 - gt_clss) * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow( pred_conf, gamma) * (1 - alpha) focal_loss = pos_loss + neg_loss focal_loss = L.reduce_sum(focal_loss, dim=[0, 1]) loss_clss.append(focal_loss) loss_masks = L.concat(loss_masks, axis=0) loss_masks = L.reduce_sum(loss_masks) * self.ins_loss_weight loss_masks = loss_masks / L.elementwise_max( L.ones((1, ), dtype='float32'), num_ins) loss_clss = L.concat(loss_clss, axis=0) loss_clss = L.reduce_sum(loss_clss) * self.clss_loss_weight loss_clss = loss_clss / L.elementwise_max( L.ones((1, ), dtype='float32'), num_ins) loss_all = {"loss_masks": loss_masks, "loss_clss": loss_clss} return loss_all
def __compute_graph_bias(q, graph_attn_mask, pos_win): """ :param q: (batch_size, n_heads, query_len, dim_per_head) :param graph_attn_mask: (batch_size, n_head, key_s_len, key_s_len) :param pos_win: :return: """ # (batch_size, n_heads, query_len, dim_per_head) pos_v = layers.fc(input=q, size=d_value, num_flatten_dims=3, param_attr=fluid.ParamAttr( name=name + '_pos_fc.w_0', initializer=param_initializer), bias_attr=name + '_pos_fc.b_0') # (batch_size, n_heads, query_len, 1) pos_s = layers.fc(input=layers.tanh(pos_v), size=1, num_flatten_dims=3, param_attr=fluid.ParamAttr( name=name + '_pos_score_fc.w_0', initializer=param_initializer), bias_attr=False) # (batch_size, n_heads, query_len, 1) pos = layers.sigmoid(pos_s) * (key_s_len - 1) # (batch_size, n_heads, query_len, 1) pos_up = layers.cast(layers.ceil(pos), dtype='int64') # print("pos_up.shape = %s" % str(pos_up.shape)) pos_down = layers.cast(layers.floor(pos), dtype='int64') # print("pos_down.shape = %s" % str(pos_down.shape)) batch_ind = layers.range(0, layers.cast(batch_size, dtype='int64'), 1, 'int64') # print("batch_ind.shape = %s" % str(batch_ind.shape)) batch_ind = layers.unsqueeze(batch_ind, axes=[1, 2, 3]) # (batch_size, 1, 1, 1) batch_ind = layers.expand( batch_ind, expand_times=[1, n_head, query_len, 1]) # (batch_size, n_heads, query_len, 1) # print("batch_ind.shape = %s" % str(batch_ind.shape)) head_ind = layers.range(0, n_head, 1, 'int64') # print("head_ind.shape = %s" % str(head_ind.shape)) head_ind = layers.unsqueeze(head_ind, axes=[0, 2, 3]) # (1, n_heads, 1, 1) head_ind = layers.expand(head_ind, expand_times=[batch_size, 1, query_len, 1]) # print("head_ind.shape = %s" % str(head_ind.shape)) query_ind = layers.range(0, layers.cast(query_len, dtype='int64'), 1, 'int64') # print("query_ind.shape = %s" % str(query_ind.shape)) query_ind = layers.unsqueeze(query_ind, axes=[0, 1, 3]) # (1, 1, query_len, 1) query_ind = layers.expand(query_ind, expand_times=[batch_size, n_head, 1, 1]) # print("query_ind.shape = %s" % str(query_ind.shape)) # (batch_size, n_heads, query_len, 4) pos_up_ind = layers.concat( input=[batch_ind, head_ind, query_ind, pos_up], axis=-1) # print("pos_up_ind.shape = %s" % str(pos_up_ind.shape)) pos_up_ind.stop_gradient = True pos_down_ind = layers.concat( input=[batch_ind, head_ind, query_ind, pos_down], axis=-1) # print("pos_down_ind.shape = %s" % str(pos_down_ind.shape)) pos_down_ind.stop_gradient = True # (batch_size, n_heads, query_len, key_s_len, key_s_len) graph_attn_mask = layers.unsqueeze(graph_attn_mask, axes=[2]) # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape)) graph_attn_mask = layers.expand(graph_attn_mask, expand_times=[1, 1, query_len, 1, 1]) # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape)) # (batch_size, n_heads, query_len, key_s_len) graph_attn_mask_up = layers.gather_nd(input=graph_attn_mask, index=pos_up_ind) graph_attn_mask_down = layers.gather_nd(input=graph_attn_mask, index=pos_down_ind) # print("graph_attn_mask_up.shape = %s" % str(graph_attn_mask_up.shape)) # print("graph_attn_mask_down.shape = %s" % str(graph_attn_mask_down.shape)) # print("pos_up.shape = %s" % str(pos_up.shape)) # print("pos_down.shape = %s" % str(pos_down.shape)) # linearly combine up and down (batch_size, n_heads, query_len, key_s_len) graph_attn_mask_select = graph_attn_mask_up * (1.0 - (layers.cast(pos_up, dtype='float32') - pos)) + \ graph_attn_mask_down * (1.0 - (pos - layers.cast(pos_down, dtype='float32'))) # print("graph_attn_mask_select.shape = %s" % str(graph_attn_mask_select.shape)) # re-weight the attention score with gaussian weights gaussian_w = ( -0.5 * graph_attn_mask_select * graph_attn_mask_select) / ( (0.5 * pos_win)**2) # [batch, n_heads, query_len, key_s_len] # print("gaussian_w.shape = %s" % str(gaussian_w.shape)) return gaussian_w