def label_embed_input(self, feature): label = F.data(name="label", shape=[None, 1], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.reshape(label, shape=[-1]) label = L.gather(label, label_idx, overwrite=False) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) feature = L.layer_norm(feature, name='layer_norm_feature_input1', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed_attr = F.ParamAttr( initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0)) embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) embed = L.layer_norm(embed, name='layer_norm_feature_input2', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed = L.relu(embed) feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + embed feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def _prepare_timestep_input(self, state, step_idx): model_input = {"gather_idx": state["parent_idx"]} # token ids pre_ids = layers.array_read(array=state["tgt_ids"], i=step_idx) model_input["token_ids"] = layers.unsqueeze(pre_ids, 1) # position ids pre_pos = layers.array_read(array=state["tgt_pos"], i=step_idx) model_input["pos_ids"] = layers.gather(pre_pos, state["parent_idx"]) pre_scores = layers.array_read(array=state["scores"], i=step_idx) # generation_mask tgt_generation_mask = layers.array_read(state["tgt_generation_mask"], i=step_idx) append_mask = layers.fill_constant_batch_size_like(pre_ids, [-1, 1, 1], "float32", 1.0) tgt_generation_mask = layers.concat([tgt_generation_mask, append_mask], axis=2) model_input["generation_mask"] = pre_mask = layers.gather(tgt_generation_mask, state["parent_idx"]) model_input["type_ids"] = layers.fill_constant_batch_size_like(pre_mask, [-1, 1, 1], "int64", 1) if self.use_role: model_input["role_ids"] = layers.fill_constant_batch_size_like(pre_mask, [-1, 1, 1], "int64", 0) return model_input, pre_ids, pre_scores
def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, beam_size, select_beam=None, generate_id=None): scores = layers.reshape(scores, shape=[1, -1]) _, topk_indexs = layers.topk(scores, k=beam_size) topk_indexs = layers.reshape(topk_indexs, shape=[-1]) # gather result top_seq = layers.gather(sequences, topk_indexs) topk_flags = layers.gather(flags, topk_indexs) topk_gather_scores = layers.gather(scores_to_gather, topk_indexs) if select_beam: topk_beam = layers.gather(select_beam, topk_indexs) else: topk_beam = select_beam if generate_id: topk_id = layers.gather(generate_id, topk_indexs) else: topk_id = generate_id return top_seq, topk_gather_scores, topk_flags, topk_beam, topk_id
def no_nms(bboxes, scores, score_threshold, keep_top_k): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if keep_top_k > 0 and len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. Specifically, transform the input tensor with shape [bs, max_sequence_length, n_head * hidden_dim] to the output tensor with shape [bs, n_head, max_sequence_length, hidden_dim]. """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = layers.reshape(x=queries, shape=[0, 0, n_head, d_key], inplace=True) # permuate the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshape_layer = wrap_layer_with_block( layers.reshape, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.reshape transpose_layer = wrap_layer_with_block( layers.transpose, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.transpose reshaped_k = reshape_layer(x=keys, shape=[0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = reshape_layer(x=values, shape=[0, 0, n_head, d_value], inplace=True) v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3]) if cache is not None: # only for faster inference if static_kv: # For encoder-decoder attention in inference cache_k, cache_v = cache["static_k"], cache["static_v"] # To init the static_k and static_v in cache. # Maybe we can use condition_op(if_else) to do these at the first # step in while loop to replace these, however it might be less # efficient. static_cache_init = wrap_layer_with_block( layers.assign, fluid.default_main_program().current_block().parent_idx) static_cache_init(k, cache_k) static_cache_init(v, cache_v) else: # For decoder self-attention in inference cache_k, cache_v = cache["k"], cache["v"] # gather cell states corresponding to selected parent select_k = layers.gather(cache_k, index=gather_idx) select_v = layers.gather(cache_v, index=gather_idx) if not static_kv: # For self attention in inference, use cache and concat time steps. select_k = layers.concat([select_k, k], axis=2) select_v = layers.concat([select_v, v], axis=2) # update cell states(caches) cached in global block layers.assign(select_k, cache_k) layers.assign(select_v, cache_v) return q, select_k, select_v return q, k, v
def compute_topk_scores_and_seq(self, sequences, scores, scores_to_gather, flags, pick_finish=False, cache=None): """ compute_topk_scores_and_seq """ topk_scores, topk_indexes = layers.topk(scores, k=self.beam_size) #[batch_size, beam_size] if not pick_finish: flat_topk_indexes = layers.reshape(topk_indexes, [-1]) + self.gather_topk_append_index flat_sequences = layers.reshape(sequences, [2 * self.batch_size * self.beam_size, -1]) else: flat_topk_indexes = layers.reshape(topk_indexes, [-1]) + self.gather_finish_topk_append_index flat_sequences = layers.reshape(sequences, [3 * self.batch_size * self.beam_size, -1]) topk_seq = layers.gather(flat_sequences, [flat_topk_indexes]) topk_seq = layers.reshape(topk_seq, [self.batch_size, self.beam_size, -1]) flat_flags = layers.reshape(flags, [-1]) topk_flags = layers.gather(flat_flags, [flat_topk_indexes]) topk_flags = layers.reshape(topk_flags, [-1, self.beam_size]) flat_scores = layers.reshape(scores_to_gather, [-1]) topk_gathered_scores = layers.gather(flat_scores, [flat_topk_indexes]) topk_gathered_scores = layers.reshape(topk_gathered_scores, [-1, self.beam_size]) if cache: self.gather_cache(cache, flat_topk_indexes) return topk_seq, topk_gathered_scores, topk_flags, cache
def __build_edges(self, edges, node_shift, edge_lod, edge_feats): """ Merge subgraph edges. """ if isinstance(edges, tuple): src, dst = edges else: src = edges[:, 0] dst = edges[:, 1] src = L.reshape(src, [-1]) dst = L.reshape(dst, [-1]) src = paddle_helper.ensure_dtype(src, dtype="int32") dst = paddle_helper.ensure_dtype(dst, dtype="int32") # preprocess edges lod_dst = L.lod_reset(dst, edge_lod) node_shift = L.reshape(node_shift, [-1, 1]) node_shift = L.sequence_expand_as(node_shift, lod_dst) node_shift = L.reshape(node_shift, [-1]) src = src + node_shift dst = dst + node_shift # sort edges self._edges_dst, index = L.argsort(dst) self._edges_src = L.gather(src, index, overwrite=False) # assign edge features if edge_feats is not None: for key, efeat in edge_feats.items(): self.edge_feat_tensor_dict[key] = L.gather(efeat, index, overwrite=False)
def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data): """ Perform nms for only the max scoring class that isn't background (class 0) """ # 确实是先坐标全部解码完成,在进行分数过滤。可以考虑过滤后再进行坐标解码 cur_scores = conf_preds[batch_idx, 1:, :] conf_scores = P.reduce_max(cur_scores, dim=0) ''' gpu版本的paddlepaddle1.6.2里有一个问题。keep如果是[None],并且在gather()里使用了keep,就会出现 cudaGetLastError invalid configuration argument errno: 9 这个错误。cpu版本则可以正常跑。 为了避免上面的问题,只能让keep不是[None],所以这里给keep额外添加了一个元素keep_extra。 ''' keep = P.where(conf_scores > self.conf_thresh) keep_extra = P.where(conf_scores < self.conf_thresh) keep_extra = keep_extra[:1] keep = P.concat([keep, keep_extra], axis=0) scores = P.gather(P.transpose(cur_scores, perm=[1, 0]), keep) scores = P.transpose(scores, perm=[1, 0]) boxes = P.gather(decoded_boxes, keep) masks = P.gather(mask_data[batch_idx], keep) ''' 因为上面增加了一个keep_extra,所以keep一定至少有一个预测框。 当官方修复了上述问题后,删除上面keep_extra的代码,下面的代码解除注释。 这么做的原因是判断keep为空太难了。 ''' # 可能没有框被保留。所以添加一个得分垫底的框让fast_nms()能进行下去 # extra_box = P.fill_constant((1, 4), 'float32', value=-1.0) # extra_score = P.fill_constant((P.shape(cur_scores)[0], 1), 'float32', value=-1.0) # extra_mask = P.fill_constant((1, P.shape(mask_data)[2]), 'float32', value=-1.0) # boxes = P.concat([boxes, extra_box], axis=0) # scores = P.concat([scores, extra_score], axis=1) # masks = P.concat([masks, extra_mask], axis=0) return self.fast_nms(boxes, scores, masks)
def no_objs_2(boxes, classes, scores): keep = P.zeros((1, 1), 'int64') boxes = P.gather(boxes, keep) classes = P.gather(classes, keep) scores = P.gather(scores, keep) scores -= 2.0 # 巧妙设置为负分数让python端过滤 return boxes, classes, scores
def train_program(self, ): label = F.data(name="label", shape=[None, 1], dtype="int64") train_idx = F.data(name='train_idx', shape=[None], dtype="int64") prediction = L.gather(self.out_feat, train_idx, overwrite=False) label = L.gather(label, train_idx, overwrite=False) cost = L.softmax_with_cross_entropy(logits=prediction, label=label) avg_cost = L.mean(cost) self.avg_cost = avg_cost
def train_program(self, ): label = F.data(name="label", shape=[None, 112], dtype="int64") train_idx = F.data(name='train_idx', shape=[None], dtype="int64") prediction = L.gather(self.out_feat, train_idx, overwrite=False) label = L.gather(label, train_idx, overwrite=False) label = L.cast(label, dtype="float32") cost = L.sigmoid_cross_entropy_with_logits(x=prediction, label=label) avg_cost = L.mean(cost) self.avg_cost = avg_cost
def forward(self, is_test=False): """ Build the network. """ substruct_graph_wrapper = GraphWrapper( name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) context_graph_wrapper = GraphWrapper( name="context_graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) substruct_center_idx = layers.data(name="substruct_center_idx", shape=[-1, 1], dtype="int64") context_overlap_idx = layers.data(name="context_overlap_idx", shape=[-1, 1], dtype="int64") context_overlap_lod = layers.data(name="context_overlap_lod", shape=[1, -1], dtype="int32") context_cycle_index = layers.data(name="context_cycle_index", shape=[-1, 1], dtype="int64") substruct_node_repr = self.substruct_model.forward( substruct_graph_wrapper, is_test=is_test) substruct_repr = layers.gather(substruct_node_repr, substruct_center_idx) context_node_repr = self.context_model.forward(context_graph_wrapper, is_test=is_test) context_overlap_repr = layers.gather(context_node_repr, context_overlap_idx) context_repr = layers.sequence_pool( layers.lod_reset(context_overlap_repr, context_overlap_lod), self.context_pooling) neg_context_repr = layers.gather(context_repr, context_cycle_index) pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1) pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1) label_pos = pred_pos * 0.0 + 1.0 label_pos.stop_gradient = True label_neg = pred_neg * 0.0 label_neg.stop_gradient = True loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \ + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg) loss = layers.reduce_mean(loss) self.substruct_graph_wrapper = substruct_graph_wrapper self.context_graph_wrapper = context_graph_wrapper self.loss = loss
def exist_objs_2(keep, seg_masks, masks, sum_masks, scores, classes): keep = L.reshape(keep, (-1, )) # [M2, ] keep.stop_gradient = True seg_masks = L.gather(seg_masks, keep) # [M2, s4, s4] M2个物体的掩码 masks = L.gather(masks, keep) # [M2, s4, s4] M2个物体的掩码概率 sum_masks = L.gather(sum_masks, keep) # [M2, ] M2个物体的掩码面积 scores = L.gather(scores, keep) # [M2, ] M2个物体的分数 classes = L.gather(classes, keep) # [M2, ] M2个物体的类别id return seg_masks, masks, sum_masks, scores, classes
def no_objs_2(seg_masks, masks, sum_masks, scores, classes): keep = L.zeros((1, ), np.int64) keep.stop_gradient = True seg_masks = L.gather(seg_masks, keep) # [M2, s4, s4] M2个物体的掩码 masks = L.gather(masks, keep) # [M2, s4, s4] M2个物体的掩码概率 sum_masks = L.gather(sum_masks, keep) # [M2, ] M2个物体的掩码面积 scores = L.gather(scores, keep) - 99.0 # [M2, ] M2个物体的分数。负分数,后面会被过滤掉。 classes = L.gather(classes, keep) # [M2, ] M2个物体的类别id return seg_masks, masks, sum_masks, scores, classes
def body_func(step_idx, pre_ids, pre_scores, gather_idx, caches, trg_src_attn_bias): # gather cell states corresponding to selected parent pre_caches = map_structure( lambda x: layers.gather(x, index=gather_idx), caches) pre_src_attn_bias = layers.gather(trg_src_attn_bias, index=gather_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_src_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = wrap_decoder((pre_ids, pre_pos, None, pre_src_attn_bias), trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, enc_output=enc_output, caches=pre_caches, bos_idx=bos_idx) # intra-beam topK topk_scores, topk_indices = layers.topk( input=layers.softmax(logits), k=beam_size) accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores, axis=0) # beam_search op uses lod to differentiate branches. accu_scores = layers.lod_reset(accu_scores, pre_ids) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, end_id=eos_idx, return_parent_idx=True) step_idx = layers.increment(x=step_idx, value=1.0, in_place=False) layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) return (step_idx, selected_ids, selected_scores, gather_idx, pre_caches, pre_src_attn_bias)
def __split_heads_qkv_word(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. :param queries: (batch_size, tgt_len, d_model) :param keys: (batch_size, n_block, n_token, d_model) :param values: (batch_size, n_block, n_token, d_model) :return: """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = layers.reshape(x=queries, shape=[0, 0, n_head, d_key], inplace=True) # [batch_size, n_head, tgt_len, dim_per_head] q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshape_layer = wrap_layer_with_block( layers.reshape, fluid.default_main_program().current_block().parent_idx ) if cache is not None else layers.reshape transpose_layer = wrap_layer_with_block( layers.transpose, fluid.default_main_program().current_block().parent_idx ) if cache is not None else layers.transpose reshaped_k = reshape_layer(x=keys, shape=[0, 0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 1, 3, 2, 4]) reshaped_v = reshape_layer(x=values, shape=[0, 0, 0, n_head, d_value], inplace=True) v = transpose_layer(x=reshaped_v, perm=[0, 1, 3, 2, 4]) if cache is not None: # only for faster inference cache_k, cache_v = cache["static_k_word"], cache["static_v_word"] # To init the static_k and static_v in cache. static_cache_init = wrap_layer_with_block( layers.assign, fluid.default_main_program().current_block().parent_idx) static_cache_init(k, cache_k) static_cache_init(v, cache_v) # gather cell states corresponding to selected parent select_k = layers.gather(cache_k, index=gather_idx) select_v = layers.gather(cache_v, index=gather_idx) layers.assign(select_k, cache_k) layers.assign(select_v, cache_v) return q, select_k, select_v return q, k, v
def gather_cache(self, kv_caches, select_id): """ gather cache """ for index in xrange(len(kv_caches)): kv_cache = kv_caches[index] select_k = layers.gather(kv_cache['k'], [select_id]) select_v = layers.gather(kv_cache['v'], [select_id]) layers.assign(select_k, kv_caches[index]['k']) layers.assign(select_v, kv_caches[index]['v'])
def fast_nms(self, boxes, scores, masks, max_num_detections=100): iou_threshold = self.nms_thresh top_k = self.top_k # 同类方框根据得分降序排列 scores, idx = P.argsort(scores, axis=1, descending=True) idx = idx[:, :top_k] scores = scores[:, :top_k] num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1] idx = P.reshape(idx, (-1, )) boxes = P.gather(boxes, idx) boxes = P.reshape(boxes, (num_classes, num_dets, 4)) masks = P.gather(masks, idx) masks = P.reshape(masks, (num_classes, num_dets, -1)) # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU iou = jaccard(boxes, boxes) # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵 # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0 rows = P.range(0, num_dets, 1, 'int32') cols = P.range(0, num_dets, 1, 'int32') rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1]) cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets]) tri_mask = P.cast(rows > cols, 'float32') tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)), [num_classes, 1, 1]) iou = tri_mask * iou iou_max = P.reduce_max(iou, dim=1) # Now just filter out the ones higher than the threshold keep = P.where(iou_max <= iou_threshold) # Assign each kept detection to its corresponding class classes = P.range(0, num_classes, 1, 'int32') classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets]) classes = P.gather_nd(classes, keep) boxes = P.gather_nd(boxes, keep) masks = P.gather_nd(masks, keep) scores = P.gather_nd(scores, keep) # Only keep the top cfg.max_num_detections highest scores across all classes scores, idx = P.argsort(scores, axis=0, descending=True) idx = idx[:max_num_detections] scores = scores[:max_num_detections] classes = P.gather(classes, idx) boxes = P.gather(boxes, idx) masks = P.gather(masks, idx) return boxes, masks, classes, scores
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k): ''' :param boxes: [?, 4] :param scores: [80, ?] ''' # 同类方框根据得分降序排列 scores, idx = P.argsort(scores, axis=1, descending=True) idx = idx[:, :keep_top_k] scores = scores[:, :keep_top_k] num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1] idx = P.reshape(idx, (-1, )) boxes = P.gather(boxes, idx) boxes = P.reshape(boxes, (num_classes, num_dets, 4)) # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU iou = _iou(boxes, boxes) # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵 # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0 rows = P.range(0, num_dets, 1, 'int32') cols = P.range(0, num_dets, 1, 'int32') rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1]) cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets]) tri_mask = P.cast(rows > cols, 'float32') tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)), [num_classes, 1, 1]) iou = tri_mask * iou iou_max = P.reduce_max(iou, dim=1) # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。 keep = P.where(iou_max <= nms_thresh) # Assign each kept detection to its corresponding class classes = P.range(0, num_classes, 1, 'int32') classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets]) classes = P.gather_nd(classes, keep) boxes = P.gather_nd(boxes, keep) scores = P.gather_nd(scores, keep) # Only keep the top cfg.max_num_detections highest scores across all classes scores, idx = P.argsort(scores, axis=0, descending=True) idx = idx[:nms_top_k] scores = scores[:nms_top_k] classes = P.gather(classes, idx) boxes = P.gather(boxes, idx) return boxes, scores, classes
def matrix_nms(bboxes, scores, score_threshold, post_threshold, nms_top_k, keep_top_k, use_gaussian=False, gaussian_sigma=2.): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top nms_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if nms_top_k > 0 and len(sort_inds) > nms_top_k: sort_inds = sort_inds[:nms_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) # Matrix NMS kernel = 'gaussian' if use_gaussian else 'linear' cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma) # filter. keep = L.where(cate_scores >= post_threshold) if len(keep) == 0: return L.zeros((0, 6), 'float32') - 1.0 bboxes = L.gather(bboxes, keep) cate_scores = L.gather(cate_scores, keep) cate_labels = L.gather(cate_labels, keep) # sort and keep keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def label_embed_input(self, feature): label = F.data(name="label", shape=[None, self.out_size], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.gather(label, label_idx, overwrite=False) label = L.cast(label, dtype="float32") label_feat = self.embed_input(label, "label_feat") feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + label_feat feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def norm_gcn(gw, feature, hidden_size, activation, name, norm=None): """Implementation of graph convolutional neural networks(GCN), using different normalization method. Args: gw: Graph wrapper object. feature: A tensor with shape (num_nodes, feature_size). hidden_size: The hidden size for norm gcn. activation: The activation for the output. name: Norm gcn layer names. norm: If norm is not None, then the feature will be normalized. Norm must be tensor with shape (num_nodes,) and dtype float32. Return: A tensor with shape (num_nodes, hidden_size) """ size = feature.shape[-1] feature = L.fc(feature, size=hidden_size, bias_attr=False, param_attr=fluid.ParamAttr(name=name)) if norm is not None: src, dst = gw.edges norm_src = L.gather(norm, src, overwrite=False) norm_dst = L.gather(norm, dst, overwrite=False) norm = norm_src * norm_dst def send_src_copy(src_feat, dst_feat, edge_feat): return src_feat["h"] * norm else: def send_src_copy(src_feat, dst_feat, edge_feat): return src_feat["h"] msg = gw.send(send_src_copy, nfeat_list=[("h", feature)]) output = gw.recv(msg, "sum") bias = L.create_parameter( shape=[hidden_size], dtype='float32', is_bias=True, name=name + '_bias') output = L.elementwise_add(output, bias, act=activation) return output
def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished): """ is_finished """ base_1 = layers.cast(source_length, 'float32') + 55.0 base_1 /= 6.0 max_length_penalty = layers.pow(base_1, self.alpha) flat_alive_log_probs = layers.reshape(alive_log_probs, [-1]) lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index]) lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1) finished_in_finished = layers.cast(finished_in_finished, 'bool') lowest_score_of_finished_in_finish += \ ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF) #print lowest_score_of_finished_in_finish bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, lower_bound_alive_scores)) decode_length = source_length + 50 length_cond = layers.less_than(x=step_idx, y=decode_length) return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
def take_final_feature(self, feature, index, name): """take final feature""" feat = L.gather(feature, index, overwrite=False) ernie_config = self.config.ernie_config ernie = ErnieGraphModel(src_ids=feat, config=ernie_config, slot_seqlen=self.config.max_seqlen, name="student_") feat = ernie.get_pooled_output() fc_lr = self.config.lr / 0.001 feat = L.fc( feat, self.config.hidden_size, act="relu", param_attr=F.ParamAttr(name=name + "_l", learning_rate=fc_lr), ) feat = L.l2_normalize(feat, axis=1) if self.config.final_fc: feat = L.fc(feat, self.config.hidden_size, param_attr=F.ParamAttr(name=name + '_w'), bias_attr=F.ParamAttr(name=name + '_b')) if self.config.final_l2_norm: feat = L.l2_normalize(feat, axis=1) return feat
def ce_conf_loss(self, pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2, gt_area): labels_pos_cid2 = P.reshape(labels_pos_cid2, (-1, )) # [batch_size*num_priors] pred_allboxes_conf_r = P.reshape( pred_allboxes_conf, (-1, P.shape(pred_allboxes_conf)[2] )) # [batch_size*num_priors, num_classes] label_prob = P.gather( class_vectors, labels_pos_cid2) # one-hot掩码 (batch_size*num_priors, num_classes) pred_prob = P.softmax(pred_allboxes_conf_r) pred_prob = P.cast(pred_prob, 'float32') prob_loss = label_prob * (0 - P.log(pred_prob + 1e-9)) # 加了极小的常数防止nan prob_loss = P.reduce_sum(prob_loss, dim=1) # 只留下正反例的损失 labels_pos_mask2 = P.reshape(labels_pos_mask, (-1, )) # [batch_size*num_priors] labels_neg_mask2 = P.reshape(labels_neg_mask, (-1, )) # [batch_size*num_priors] conf_loss_scale = 2.0 - gt_area # gt面积越小,权重越大,越受重视 conf_loss_scale = P.reshape(conf_loss_scale, (-1, )) # [batch_size*num_priors] prob_pos_loss = prob_loss * labels_pos_mask2 * conf_loss_scale prob_neg_loss = prob_loss * labels_neg_mask2 ce_loss = prob_pos_loss + prob_neg_loss ce_loss = P.reduce_sum(ce_loss) return ce_loss
def grow_top_k(step_idx, alive_seq, alive_log_prob, parant_idx): pre_ids = alive_seq dec_step_emb = layers.embedding( input=pre_ids, size=[self.tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr( name='target_embedding', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale))) dec_att_out, new_hidden_array, new_cell_array = decoder_step( dec_step_emb, pre_feed, pre_hidden_array, pre_cell_array, enc_memory) projection = layers.matmul(dec_att_out, softmax_weight) logits = layers.softmax(projection) current_log = layers.elementwise_add(x=layers.log(logits), y=alive_log_prob, axis=0) base_1 = layers.cast(step_idx, 'float32') + 6.0 base_1 /= 6.0 length_penalty = layers.pow(base_1, alpha) len_pen = layers.pow( ((5. + layers.cast(step_idx + 1, 'float32')) / 6.), alpha) current_log = layers.reshape(current_log, shape=[1, -1]) current_log = current_log / length_penalty topk_scores, topk_indices = layers.topk(input=current_log, k=beam_size) topk_scores = layers.reshape(topk_scores, shape=[-1]) topk_log_probs = topk_scores * length_penalty generate_id = layers.reshape(topk_indices, shape=[-1]) % self.tar_vocab_size selected_beam = layers.reshape( topk_indices, shape=[-1]) // self.tar_vocab_size topk_finished = layers.equal(generate_id, eos_ids) topk_finished = layers.cast(topk_finished, 'float32') generate_id = layers.reshape(generate_id, shape=[-1, 1]) pre_tokens_list = layers.gather(tokens, selected_beam) full_tokens_list = layers.concat( [pre_tokens_list, generate_id], axis=1) return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \ dec_att_out, new_hidden_array, new_cell_array
def forward(self, is_test=False): """ Build the network. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64") masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) masked_node_repr = layers.gather(node_repr, masked_node_indice) logits = layers.fc(masked_node_repr, size=len(CompoundConstants.atom_num_list), name="masked_node_logits") loss, pred = layers.softmax_with_cross_entropy( logits, masked_node_label, return_softmax=True) loss = layers.reduce_mean(loss) acc = layers.accuracy(pred, masked_node_label) self.graph_wrapper = graph_wrapper self.loss = loss
def graph_gather(gw, feature, index): """Implementation of graph gather Gather the corresponding index for each graph. Args: gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`) feature: A tensor with shape (num_nodes, ). index (int32): A tensor with K-rank where the first dim denotes the graph. Shape (num_graph, ) or (num_graph, k1, k2, k3, ..., kn). WARNING: We dont support negative index. Return: A tensor with shape (num_graph, k1, k2, k3, ..., kn, hidden_size) """ shape = L.shape(index) output_dim = int(feature.shape[-1]) index = index + gw.graph_lod[:-1] index = L.reshape(index, [-1]) feature = L.gather(feature, index, overwrite=False) new_shape = [] for i in range(shape.shape[0]): new_shape.append(shape[i]) new_shape.append(output_dim) feature = L.reshape(feature, new_shape) return feature
def create_cam_op(self, predict, class_dim, heatmaps): """compute loss with tensor Args: predict: model output tensor activated by softmax class_dim: dim of multi-class vector heatmaps: 全局池化前的特征图 Returns: heatmaps: class activation map """ if self.main_arch in DenseNetModels: weights_shape = 1024 name = "fc_weights" elif self.main_arch == "xception": weights_shape = 2048 name = "fc_weights" else: raise ValueError( "Calc CAM of model arch {} is not supported.".format( self.main_arch)) fc_weights = FL.create_parameter(shape=[weights_shape, class_dim], dtype='float32', name=name) # 1024, 5 pred_idx = FL.argmax(predict, 1) # bs, 1 fc_weights = FL.transpose(fc_weights, perm=[1, 0]) # 5, 1024 fc_weights = FL.gather(fc_weights, index=pred_idx) # bs, 1024 heatmaps = heatmaps * fc_weights # bs, 1024, 16, 16 heatmaps = FL.reduce_sum(heatmaps, dim=1, keep_dim=False) return heatmaps
def _get_pooled_output(self, enc_out, idx=None, name="pooled"): """Get pooled output of the last output embedding in Transformer. Args: enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size] idx (optional): the selected indices in pooling operator, shape is [batch_size, 1] or [batch_size, 2]. name: a string, the name of the pooling layer. Returns: pooled_out: the pooled output embedding, shape is [batch_size, hidden_size]. """ if idx is None: feat = enc_out[:, 0] elif len(idx.shape) == 2 and idx.shape[1] == 1: enc_out = layers.squeeze(enc_out, [1]) feat = layers.gather(input=enc_out, index=idx) elif len(idx.shape) == 2 and idx.shape[1] == 2: feat = layers.gather_nd(input=enc_out, index=idx) else: raise ValueError(f"Invalid indices shape {idx.shape} is used") pooled_out = layers.fc( input=feat, size=self.hidden_size, act="tanh", param_attr=fluid.ParamAttr(name=f"{name}_fc.w_0", initializer=self.param_initializer), bias_attr=f"{name}_fc.b_0") return pooled_out