def __call__(self, x, pad): pad = paddle.reshape(pad, shape=[2, -1]) pad = paddle.transpose(pad, perm=[1, 0]) pad = paddle.reverse(pad, axis=[0]) pad = paddle.flatten(pad) pad = paddle.cast(pad, dtype="int32") x = paddle.unsqueeze(x, axis=[0, 1]) out = paddle.nn.functional.pad(x=x, pad=pad, **self.layer_attrs) out = paddle.squeeze(out, axis=[0, 1]) return out
def test_out(self): paddle.disable_static() input1 = np.random.random([5, 10]).astype("int32") out1 = np.expand_dims(input1, axis=1) out1 = np.expand_dims(out1, axis=2) input = paddle.to_tensor(input1) output = paddle.unsqueeze(input, axis=paddle.to_tensor([1, 2])) out_np = output.numpy() self.assertTrue(np.array_equal(out1, out_np)) self.assertEqual(out1.shape, out_np.shape)
def get_paddings_indicator(actual_num,max_num,axis = 0): '''Create boolean mask by actually number of a padded tensor.''' actual_num = paddle.unsqueeze(actual_num, axis+1) max_num_shape = [1] * len(actual_num.shape) max_num_shape[axis+1] = -1 max_num = paddle.arange(max_num, dtype="int64").reshape(max_num_shape) paddings_indicator = actual_num.astype("int64") > max_num return paddings_indicator
def paste_mask(self, masks, boxes, im_h, im_w): """ Paste the mask prediction to the original image. """ x0, y0, x1, y1 = paddle.split(boxes, 4, axis=1) masks = paddle.unsqueeze(masks, [0, 1]) img_y = paddle.arange(0, im_h, dtype='float32') + 0.5 img_x = paddle.arange(0, im_w, dtype='float32') + 0.5 img_y = (img_y - y0) / (y1 - y0) * 2 - 1 img_x = (img_x - x0) / (x1 - x0) * 2 - 1 img_x = paddle.unsqueeze(img_x, [1]) img_y = paddle.unsqueeze(img_y, [2]) N = boxes.shape[0] gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]]) gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]]) grid = paddle.stack([gx, gy], axis=3) img_masks = F.grid_sample(masks, grid, align_corners=False) return img_masks[:, 0]
def _get_output_single(self, input, idx): ins_kernel_feat = input # CoordConv x_range = paddle.linspace(-1, 1, paddle.shape(ins_kernel_feat)[-1], dtype='float32') y_range = paddle.linspace(-1, 1, paddle.shape(ins_kernel_feat)[-2], dtype='float32') y, x = paddle.meshgrid([y_range, x_range]) x = paddle.unsqueeze(x, [0, 1]) y = paddle.unsqueeze(y, [0, 1]) y = paddle.expand(y, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) x = paddle.expand(x, shape=[paddle.shape(ins_kernel_feat)[0], 1, -1, -1]) coord_feat = paddle.concat([x, y], axis=1) ins_kernel_feat = paddle.concat([ins_kernel_feat, coord_feat], axis=1) # kernel branch kernel_feat = ins_kernel_feat seg_num_grid = self.seg_num_grids[idx] kernel_feat = F.interpolate(kernel_feat, size=[seg_num_grid, seg_num_grid], mode='bilinear', align_corners=False, align_mode=0) cate_feat = kernel_feat[:, :-2, :, :] for kernel_layer in self.kernel_pred_convs: kernel_feat = F.relu(kernel_layer(kernel_feat)) kernel_pred = self.solo_kernel(kernel_feat) # cate branch for cate_layer in self.cate_pred_convs: cate_feat = F.relu(cate_layer(cate_feat)) cate_pred = self.solo_cate(cate_feat) if not self.training: cate_pred = self._points_nms(F.sigmoid(cate_pred), kernel_size=2) cate_pred = paddle.transpose(cate_pred, [0, 2, 3, 1]) return cate_pred, kernel_pred
def forward(self, input): pool = self.pool2d_gap(input) pool = paddle.squeeze(pool, axis=[2, 3]) squeeze = self.squeeze(pool) squeeze = F.relu(squeeze) excitation = self.excitation(squeeze) excitation = F.sigmoid(excitation) excitation = paddle.unsqueeze(excitation, axis=[2, 3]) out = input * excitation return out
def forward(self, inputs, targets=None, batch_max_length=25): batch_size = inputs.shape[0] num_steps = batch_max_length hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros((batch_size, self.hidden_size))) output_hiddens = [] if targets is not None: for i in range(num_steps): # one-hot vectors for a i-th char char_onehots = self._char_to_onehot( targets[:, i], onehot_dim=self.num_classes) hidden, alpha = self.attention_cell(hidden, inputs, char_onehots) hidden = (hidden[1][0], hidden[1][1]) output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) output = paddle.concat(output_hiddens, axis=1) probs = self.generator(output) else: targets = paddle.zeros(shape=[batch_size], dtype="int32") probs = None for i in range(num_steps): char_onehots = self._char_to_onehot( targets, onehot_dim=self.num_classes) hidden, alpha = self.attention_cell(hidden, inputs, char_onehots) probs_step = self.generator(hidden[0]) hidden = (hidden[1][0], hidden[1][1]) if probs is None: probs = paddle.unsqueeze(probs_step, axis=1) else: probs = paddle.concat( [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1) next_input = probs_step.argmax(axis=1) targets = next_input return probs
def forward(self, inputs): out = self.pool(inputs) out = paddle.squeeze(out, axis=[2, 3]) out = self.squeeze(out) out = F.relu(out) out = self.extract(out) out = F.sigmoid(out) out = paddle.unsqueeze(out, axis=[2, 3]) scale = out * inputs return scale
def construct_pred_edge(self, fe_index, s): """ fe_index: full_edge_index, [2, all_edges_batchwise] s: predicted edge value, [all_edges_batchwise, 1] construct the predicted edge set and corresponding edge weights """ s = s[:, 0] fe_index = paddle.transpose(fe_index, perm=[1, 0]) sender = paddle.unsqueeze(fe_index[0][s > 0], 0) receiver = paddle.unsqueeze(fe_index[1][s > 0], 0) pred_index = paddle.concat([sender, receiver], 0) pred_weight = s[s > 0] pred_index = paddle.transpose(pred_index, perm=[1, 0]) return pred_index, pred_weight
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): if attention_mask is None: attention_mask = paddle.unsqueeze( (input_ids == self.pad_token_id).astype(self.pooler.dense.weight.dtype) * -1e9, axis=[1, 2]) embedding_output = self.embeddings( input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids) encoder_outputs = self.encoder(embedding_output, attention_mask) sequence_output = encoder_outputs pooled_output = self.pooler(sequence_output) return sequence_output, pooled_output
def forward(self, input_ids, attention_mask=None): if attention_mask is None: attention_mask = paddle.unsqueeze( (input_ids == self.pad_token_id).astype( self.encoder.layers[0].norm1.weight.dtype) * -1e9, axis=[1, 2]) embedding_output = self.embeddings(input_ids=input_ids) encoder_outputs = self.encoder(embedding_output, attention_mask) return encoder_outputs
def predict_pnet(infer_data): # 添加待预测的图片 infer_data = paddle.to_tensor(infer_data, dtype='float32') infer_data = paddle.unsqueeze(infer_data, axis=0) # 执行预测 cls_prob, bbox_pred, _ = pnet(infer_data) cls_prob = paddle.squeeze(cls_prob) cls_prob = softmax_p(cls_prob) bbox_pred = paddle.squeeze(bbox_pred) return cls_prob.numpy(), bbox_pred.numpy()
def forward(self, inputs): pool = self.pool2d_gap(inputs) pool = paddle.squeeze(pool, axis=[2, 3]) squeeze = self.squeeze(pool) squeeze = F.relu(squeeze) excitation = self.excitation(squeeze) excitation = paddle.clip(x=excitation, min=0, max=1) excitation = paddle.unsqueeze(excitation, axis=[2, 3]) out = paddle.multiply(inputs, excitation) return out
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): The output of __call__ with shape [N, 6] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 6] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ if bboxes.shape[0] == 0: return paddle.to_tensor([[0, 0.0, 0.0, 0.0, 0.0, 0.0]]) origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def predict_test_util(place, mode): place = paddle.set_device(place) paddle.seed(123) np.random.seed(123) class Net(paddle.nn.Layer): def __init__(self): super(Net, self).__init__() self.rnn = getattr(paddle.nn, mode)(16, 32, 2, direction="bidirectional", dropout=0.1) def forward(self, input): return self.rnn(input) x = paddle.randn((4, 10, 16)) x.stop_gradient = False seq_len = paddle.to_tensor(np.array([10, 6, 8, 5])) mask = sequence_mask(seq_len, maxlen=10, dtype=x.dtype) mask = paddle.unsqueeze(mask, [2]) rnn = Net() y, _ = rnn(x) y = y * mask loss = paddle.mean(y) loss.backward() optimizer = paddle.optimizer.Adam( learning_rate=0.1, parameters=rnn.parameters()) optimizer.step() rnn.eval() y, _ = rnn(x) # `jit.to_static` would include a train_program, eval mode might cause # some errors currently, such as dropout grad op gets `is_test == True`. rnn.train() rnn = paddle.jit.to_static( rnn, [paddle.static.InputSpec( shape=[None, None, 16], dtype=x.dtype)]) paddle.jit.save(rnn, "./inference/%s_infer" % mode) paddle.enable_static() new_scope = paddle.static.Scope() with paddle.static.scope_guard(new_scope): exe = paddle.static.Executor(place) [inference_program, feed_target_names, fetch_targets] = paddle.static.load_inference_model( "./inference/%s_infer" % mode, exe) results = exe.run(inference_program, feed={feed_target_names[0]: x.numpy()}, fetch_list=fetch_targets) np.testing.assert_equal( y.numpy(), results[0]) # eval results equal predict results paddle.disable_static()
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred = bbox_head_out[0] cls_prob = bbox_head_out[1] roi = rois[0] rois_num = rois[1] origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # bbox_pred.shape: [N, C*4] # C=num_classes in faster/mask rcnn(bbox_head), C=1 in cascade rcnn(cascade_head) bbox = paddle.concat(roi) if bbox.shape[0] == 0: bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32') else: bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # bbox.shape: [N, C, 4] # bbox.shape[1] must be equal to scores.shape[1] bbox_num_class = bbox.shape[1] if bbox_num_class == 1: bbox = paddle.tile(bbox, [1, self.num_classes, 1]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores
def forward(self, x, res_dict=None): residual = x x = self.avg_pool(x) x = paddle.squeeze(x, axis=[2, 3]) x = self.fc_squeeze(x) x = self.relu(x) x = self.fc_excitation(x) x = self.sigmoid(x) x = paddle.unsqueeze(x, axis=[2, 3]) x = residual * x return x
def decode(self, inputs, caches): tgt_ids = inputs['tgt_ids'] tgt_pos = inputs['tgt_pos'] tgt_generation_mask = inputs['tgt_generation_mask'] predictions = tgt_ids # TODO step = 0 while step < self.max_dec_len: # [-1, 1] append_mask = paddle.cast( tgt_ids != self.eos_id, dtype=tgt_generation_mask.dtype) tgt_generation_mask = paddle.concat( [tgt_generation_mask, paddle.unsqueeze(append_mask, 1)], axis=-1) tgt_sent = paddle.ones( [tgt_generation_mask.shape[0], 1], dtype=tgt_ids.dtype) # [-1, 1, hidden_size] out, caches = self.plato2_encoder(caches, tgt_ids, tgt_sent, tgt_pos, tgt_generation_mask) out = paddle.squeeze(out, axis=1) # [-1, hidden_size] trans = self.logits_fc_layer(out) trans = self.gelu_layer(trans) trans = self.logits_layer_norm(trans) # [-1, vocab_size] logits = paddle.matmul( trans, self.plato2_encoder.word_embedding_layer.weight, transpose_y=True) + self.logits_bias logits[:, self.unk_id] = -1e9 logits[:, self.bos_id] = -1e9 logits[:, self.mask_id] = -1e9 if step < self.min_dec_len: logits[:, self.eos_id] = -1e9 logits = logits * append_mask + (1 - append_mask) * self.after_eos probs = self.softmax(logits) # [-1, topk] topk_probs, _ = paddle.topk(probs, k=self.topk) mask = paddle.cast(probs >= topk_probs[:, -1:], 'float32') sums = paddle.sum(topk_probs, axis=-1, keepdim=True) new_probs = probs * mask / sums # [-1, 1] sampling_ids = paddle.multinomial(new_probs) step = step + 1 tgt_ids = sampling_ids tgt_pos = tgt_pos + 1 predictions = paddle.concat([predictions, tgt_ids], axis=1) return predictions
def forward(self, inputs): """ forward """ axis = self.config['axis'] if self.config['isTensor']: axis = paddle.to_tensor(axis) if self.config['isTensor13']: axis = axis * 1 x = paddle.unsqueeze(inputs, axis=axis) return x
def global_context_block(self, x): x_shape = paddle.shape(x) # [N, C, H * W] input_x = paddle.reshape(x, shape=[0, self.gc_channels, -1]) # [N, 1, C, H * W] input_x = paddle.unsqueeze(input_x, axis=1) # [N, 1, H, W] context_mask = self.conv_mask(x) # [N, 1, H * W] context_mask = paddle.reshape(context_mask, shape=[0, 1, -1]) context_mask = self.softmax(context_mask) # [N, 1, H * W, 1] context_mask = paddle.unsqueeze(context_mask, axis=-1) # [N, 1, C, 1] context = paddle.matmul(input_x, context_mask) # [N, C, 1, 1] context = paddle.reshape(context, shape=[0, self.gc_channels, 1, 1]) return context
def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 10], dtype='float64') result_squeeze = paddle.unsqueeze(data1, axis=[1]) place = fluid.CPUPlace() exe = fluid.Executor(place) input1 = np.random.random([5, 1, 10]).astype('float64') input = np.squeeze(input1, axis=1) result, = exe.run(feed={"data1": input}, fetch_list=[result_squeeze]) self.assertTrue(np.allclose(input1, result))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): if self.task in ['seq-cls', 'token-cls']: logits = self.model(input_ids, token_type_ids, position_ids, attention_mask) return logits elif self.task == 'qa': start_logits, end_logits = self.model(input_ids, token_type_ids, position_ids, attention_mask) start_position = paddle.unsqueeze(start_position, axis=-1) end_position = paddle.unsqueeze(end_position, axis=-1) return start_position, end_position elif self.task is None: sequence_output, pooled_output = self.model( input_ids, token_type_ids, position_ids, attention_mask) return sequence_output, pooled_output
def compute(self, pred, label, seq_mask=None): label = paddle.unsqueeze(label, axis=2) ce = F.softmax_with_cross_entropy(logits=pred, label=label, soft_label=False) ce = paddle.squeeze(ce, axis=[2]) if seq_mask is not None: ce = ce * seq_mask word_num = paddle.sum(seq_mask) return ce, word_num return ce
def forward(self, logit, label): """ Forward computation. Args: logit (Tensor): Logit tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. label (Tensor): Label tensor, the data type is int64. Shape is (N, C), where each value is 0 or 1, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. """ if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) mask = (label != self.ignore_index) mask = paddle.cast(mask, 'float32') # label.shape should equal to the logit.shape if label.shape[1] != logit.shape[1]: label = label.squeeze(1) label = F.one_hot(label, logit.shape[1]) label = label.transpose((0, 3, 1, 2)) if isinstance(self.weight, str): pos_index = (label == 1) neg_index = (label == 0) pos_num = paddle.sum(pos_index.astype('float32')) neg_num = paddle.sum(neg_index.astype('float32')) sum_num = pos_num + neg_num weight_pos = 2 * neg_num / (sum_num + self.EPS) weight_neg = 2 * pos_num / (sum_num + self.EPS) weight = weight_pos * label + weight_neg * (1 - label) else: weight = self.weight if isinstance(self.pos_weight, str): pos_index = (label == 1) neg_index = (label == 0) pos_num = paddle.sum(pos_index.astype('float32')) neg_num = paddle.sum(neg_index.astype('float32')) sum_num = pos_num + neg_num pos_weight = 2 * neg_num / (sum_num + self.EPS) else: pos_weight = self.pos_weight label = label.astype('float32') loss = paddle.nn.functional.binary_cross_entropy_with_logits( logit, label, weight=weight, reduction='none', pos_weight=pos_weight) loss = loss * mask loss = paddle.mean(loss) / (paddle.mean(mask) + self.EPS) label.stop_gradient = True mask.stop_gradient = True return loss
def forward(self, query, processed_key, value, attention_weights_cat, mask=None): """Compute context vector and attention weights. Parameters ----------- query : Tensor [shape=(batch_size, d_query)] The queries. processed_key : Tensor [shape=(batch_size, time_steps_k, d_attention)] The keys after linear layer. value : Tensor [shape=(batch_size, time_steps_k, d_key)] The values. attention_weights_cat : Tensor [shape=(batch_size, time_step_k, 2)] Attention weights concat. mask : Tensor, optional The mask. Shape should be (batch_size, times_steps_q, time_steps_k) or broadcastable shape. Defaults to None. Returns ---------- attention_context : Tensor [shape=(batch_size, time_steps_q, d_attention)] The context vector. attention_weights : Tensor [shape=(batch_size, times_steps_q, time_steps_k)] The attention weights. """ processed_query = self.query_layer(paddle.unsqueeze(query, axis=[1])) processed_attention_weights = self.location_layer( self.location_conv(attention_weights_cat)) alignment = self.value( paddle.tanh(processed_attention_weights + processed_key + processed_query)) if mask is not None: alignment = alignment + (1.0 - mask) * -1e9 attention_weights = F.softmax(alignment, axis=1) attention_context = paddle.matmul(attention_weights, value, transpose_x=True) attention_weights = paddle.squeeze(attention_weights, axis=[-1]) attention_context = paddle.squeeze(attention_context, axis=[1]) return attention_context, attention_weights
def update_buffer(self, x_t): """Shift the buffer by one step. Parameters ---------- x_t : Tensor [shape=(batch_size, in_channels)] The step input. """ self._buffer = paddle.concat( [self._buffer[:, :, 1:], paddle.unsqueeze(x_t, -1)], -1)
def topk_sampling(self, probs): topk_probs, _ = paddle.topk(probs, self.topk) ge_cond = paddle.cast( paddle.greater_equal(probs, paddle.unsqueeze(topk_probs[:, -1], [1])), "float32") old_probs = probs probs = probs * ge_cond / paddle.sum(topk_probs, axis=-1, keepdim=True) sampling_ids = layers.sampling_id(probs, dtype="int") probs = old_probs return probs, sampling_ids
def forward(self, x): feature_embedding = [] for i in range(len(x)): embed = self.embedding_dict[i](x[i]) feature_embedding.append(embed) feature_embedding = paddle.concat(feature_embedding, 1) tower_click = self.click_tower(feature_embedding) tower_conversion = paddle.unsqueeze( self.conversion_tower(feature_embedding), 1) info = paddle.unsqueeze(self.info_layer(tower_click), 1) ait = self.attention_layer(paddle.concat([tower_conversion, info], 1)) click = paddle.squeeze(self.click_layer(tower_click), 1) conversion = paddle.squeeze(self.conversion_layer(ait), 1) return click, conversion
def __call__(self, preds, prior_boxes, im_shape, scale_factor, var_weight=None): boxes, scores = preds['boxes'], preds['scores'] outputs = [] for box, score, prior_box in zip(boxes, scores, prior_boxes): pb_w = prior_box[:, 2] - prior_box[:, 0] + self.norm_delta pb_h = prior_box[:, 3] - prior_box[:, 1] + self.norm_delta pb_x = prior_box[:, 0] + pb_w * 0.5 pb_y = prior_box[:, 1] + pb_h * 0.5 out_x = pb_x + box[:, :, 0] * pb_w * 0.1 out_y = pb_y + box[:, :, 1] * pb_h * 0.1 out_w = paddle.exp(box[:, :, 2] * 0.2) * pb_w out_h = paddle.exp(box[:, :, 3] * 0.2) * pb_h if self.is_normalized: h = paddle.unsqueeze(im_shape[:, 0] / scale_factor[:, 0], axis=-1) w = paddle.unsqueeze(im_shape[:, 1] / scale_factor[:, 1], axis=-1) output = paddle.stack([(out_x - out_w / 2.) * w, (out_y - out_h / 2.) * h, (out_x + out_w / 2.) * w, (out_y + out_h / 2.) * h], axis=-1) else: output = paddle.stack([ out_x - out_w / 2., out_y - out_h / 2., out_x + out_w / 2. - 1., out_y + out_h / 2. - 1. ], axis=-1) outputs.append(output) boxes = paddle.concat(outputs, axis=1) scores = F.softmax(paddle.concat(scores, axis=1)) scores = paddle.transpose(scores, [0, 2, 1]) return boxes, scores
def forward(self, inputs, targets=None, batch_max_length=25): batch_size = paddle.shape(inputs)[0] num_steps = batch_max_length hidden = paddle.zeros((batch_size, self.hidden_size)) output_hiddens = [] if targets is not None: for i in range(num_steps): char_onehots = self._char_to_onehot( targets[:, i], onehot_dim=self.num_classes) (outputs, hidden), alpha = self.attention_cell(hidden, inputs, char_onehots) output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) output = paddle.concat(output_hiddens, axis=1) probs = self.generator(output) else: targets = paddle.zeros(shape=[batch_size], dtype="int32") probs = None char_onehots = None outputs = None alpha = None for i in range(num_steps): char_onehots = self._char_to_onehot( targets, onehot_dim=self.num_classes) (outputs, hidden), alpha = self.attention_cell(hidden, inputs, char_onehots) probs_step = self.generator(outputs) if probs is None: probs = paddle.unsqueeze(probs_step, axis=1) else: probs = paddle.concat( [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1) next_input = probs_step.argmax(axis=1) targets = next_input if not self.training: probs = paddle.nn.functional.softmax(probs, axis=2) return probs