def forward(self, x): out = self.conv1(x) rp = F.adaptive_max_pool2d(out, (self.s, 1)) cp = F.adaptive_max_pool2d(out, (1, self.s)) p = paddle.reshape(self.conv_p(rp), (x.shape[0], self.k, self.s, self.s)) q = paddle.reshape(self.conv_q(cp), (x.shape[0], self.k, self.s, self.s)) p = F.sigmoid(p) q = F.sigmoid(q) p = p / paddle.sum(p, axis=3, keepdim=True) q = q / paddle.sum(q, axis=2, keepdim=True) p = paddle.reshape(p, (x.shape[0], self.k, 1, self.s, self.s)) p = paddle.expand(p, (x.shape[0], self.k, x.shape[1] // self.k, self.s, self.s)) p = paddle.reshape(p, (x.shape[0], x.shape[1], self.s, self.s)) q = paddle.reshape(q, (x.shape[0], self.k, 1, self.s, self.s)) q = paddle.expand(q, (x.shape[0], self.k, x.shape[1] // self.k, self.s, self.s)) q = paddle.reshape(q, (x.shape[0], x.shape[1], self.s, self.s)) p = self.resize_mat(p, x.shape[2] // self.s) q = self.resize_mat(q, x.shape[2] // self.s) y = paddle.matmul(p, x) y = paddle.matmul(y, q) y = self.conv2(y) return y
def _build_volume_2d3(self, feat_l, feat_r, maxdisp, disp, stride=1): """ output residual map L1 distance-based cost """ size = feat_l.shape disp = paddle.unsqueeze(disp, axis=1) batch_disp = paddle.expand(disp, shape=[disp.shape[0], maxdisp * 2 - 1, disp.shape[-3], disp.shape[-2], disp.shape[-1]]) batch_disp = batch_disp.reshape(shape=[-1, 1, size[-2], size[-1]]) batch_shift = paddle.arange(-maxdisp + 1, maxdisp, dtype="float32") batch_shift = paddle.expand(batch_shift, shape=[size[0], batch_shift.shape[0]]).reshape(shape=[-1]).unsqueeze( axis=[1, 2, 3]) * stride batch_disp = batch_disp - batch_shift batch_feat_l = paddle.unsqueeze(feat_l, axis=1).expand( shape=[size[0], maxdisp * 2 - 1, size[-3], size[-2], size[-1]]).reshape( shape=[-1, size[-3], size[-2], size[-1]]) batch_feat_r = paddle.unsqueeze(feat_r, axis=1).expand( shape=[size[0], maxdisp * 2 - 1, size[-3], size[-2], size[-1]]).reshape( shape=[-1, size[-3], size[-2], size[-1]]) cost = paddle.norm(batch_feat_l - self.warp(batch_feat_r, batch_disp), 1, 1) #output residual map cost = cost.reshape(shape=[size[0], -1, size[2], size[3]]) return cost
def _get_rand_mask(self, blocked_query_mask, blocked_key_mask, rand_mask_idx, batch_size, sequence_length): ''' return random mask: [B, H, L-G, bs, R * bs] ''' # rand_mask_idx: [H, T] # blocked_query_mask: [B, L, bs] # blocked_key_mask: [B, L, bs] bs = self.block_size B = batch_size L = sequence_length // bs H = self.num_heads G = self.num_global_blocks GB = self.num_global_blocks_back GF = self.num_global_blocks_front R = self.num_rand_blocks temp_block_key_mask = paddle.unsqueeze(blocked_key_mask, 1) temp_block_key_mask = paddle.expand(temp_block_key_mask, [B, H, L, -1]) temp_block_key_mask_list = [ paddle.gather_nd(temp_block_key_mask[b], rand_mask_idx) for b in range(B) ] temp_block_key_mask = paddle.concat(temp_block_key_mask_list, 0) temp_block_key_mask = paddle.reshape(temp_block_key_mask, [B, H, L - G, 1, R * bs]) temp_blocked_query_mask = paddle.unsqueeze( blocked_query_mask[:, GF:-GB], 1) temp_blocked_query_mask = paddle.expand(temp_blocked_query_mask, [B, H, L - G, -1]) temp_blocked_query_mask = paddle.reshape(temp_blocked_query_mask, [B, H, L - G, bs, 1]) rand_mask = paddle.matmul(temp_blocked_query_mask, temp_block_key_mask) return rand_mask
def label2edge(self, label, mask_diag=True): # get size num_samples = label.shape[1] # reshape label_i = paddle.transpose( paddle.expand(label, [num_samples, label.shape[0], label.shape[1]]), [1, 2, 0]) label_j = label_i.transpose((0, 2, 1)) # compute edge edge = paddle.cast(paddle.equal(label_i, label_j), 'float32') # expand edge = edge.unsqueeze(1) if self.edge_type == 'dist': edge = 1 - edge if self.edge_dim == 2: edge = paddle.concat([edge, 1 - edge], 1) if mask_diag: diag_mask = 1.0 - paddle.expand( paddle.eye(edge.shape[2]), [edge.shape[0], self.edge_dim, edge.shape[2], edge.shape[2]]) edge = edge * diag_mask if self.edge_activation == 'softmax': edge = edge / edge.sum(-1).unsqueeze(-1) return edge
def forward(self, inputs): x = self.bn1(inputs) x = paddle.reshape(x, [1, 3 * 16 * 16]) x = self.fc1(x) x = paddle.fluid.layers.unsqueeze(input=x, axes=[2]) x = self.relu1(x) y = paddle.fluid.layers.fill_constant(x.shape, dtype=paddle.float32, value=1) # x = paddle.stack([x, y], axis=3) x = paddle.slice(x, axes=[0], starts=[0], ends=[1]) x = paddle.exp(x) # y += paddle.fluid.layers.uniform_random(y.shape) y = paddle.expand(y, shape=[1, 768, 768, 2]) x = paddle.expand(x, shape=[1, 768, 768, 2]) out = paddle.concat([x, y]) out = self.dp(out) out = channel_shuffle(out, 2) out1, out2 = paddle.split(out, num_or_sections=2, axis=1) outshape = out1.shape max_idx = paddle.argmax(out1.reshape( (outshape[0], outshape[1], outshape[2] * outshape[3])), axis=-1) out2 = out2.reshape( (outshape[0], outshape[1], outshape[2] * outshape[3])) res, _ = self.lstm(out2) return res, max_idx
def gen_bias(encoder_inputs, decoder_inputs, step): decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] encoder_bsz, encoder_seqlen = encoder_inputs.shape[:2] attn_bias = paddle.reshape( paddle.arange(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) decoder_bias = paddle.cast( (paddle.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), 'float32') #[1, decoderlen, decoderlen] encoder_bias = paddle.unsqueeze( paddle.cast(paddle.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] encoder_bias = paddle.expand(encoder_bias, [encoder_bsz, decoder_seqlen, encoder_seqlen ]) #[bsz,decoderlen, encoderlen] decoder_bias = paddle.expand(decoder_bias, [decoder_bsz, decoder_seqlen, decoder_seqlen ]) #[bsz, decoderlen, decoderlen] if step > 0: bias = paddle.concat([ encoder_bias, paddle.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias ], -1) else: bias = paddle.concat([encoder_bias, decoder_bias], -1) return bias
def forward(self, x, y): if self.bias_x: x = paddle.concat([x, paddle.ones_like(x[:, :, :1])], axis=-1) if self.bias_y: y = paddle.concat([y, paddle.ones_like(x[:, :, :1])], axis=-1) # Shape x: (batch_size, num_tokens, input_size + bias_x) b = x.shape[0] o = self.weight.shape[0] # Shape x: (batch_size, output_size, num_tokens, input_size + bias_x) x = paddle.expand(paddle.unsqueeze(x, axis=1), shape=(x.shape[0], o, x.shape[1], x.shape[2])) # Shape y: (batch_size, output_size, num_tokens, input_size + bias_y) y = paddle.expand(paddle.unsqueeze(y, axis=1), shape=(y.shape[0], o, y.shape[1], y.shape[2])) # Shape weight: (batch_size, output_size, input_size + bias_x, input_size + bias_y) weight = paddle.expand(paddle.unsqueeze(self.weight, axis=0), shape=(b, self.weight.shape[0], self.weight.shape[1], self.weight.shape[2])) # Shape: (batch_size, output_size, num_tokens, num_tokens) s = paddle.matmul(paddle.matmul(x, weight), paddle.transpose(y, perm=[0, 1, 3, 2])) # Remove dim 1 if n_out == 1 if s.shape[1] == 1: s = paddle.squeeze(s, axis=1) return s
def warp(self, x, disp): """ warp an image/tensor (im2) back to im1, according to the optical flow x: [B, C, H, W] (im2) disp: [B, 1, H, W] flo: [B, 2, H, W] flow output: [B, C, H, W] (im1) """ B, C, H, W = x.shape # mesh grid xx = paddle.expand(paddle.arange(0, W, step=1, dtype='float32').reshape(shape=[1, -1]), shape=[H, W]) yy = paddle.expand(paddle.arange(0, H, step=1, dtype='float32').reshape(shape=[-1, 1]), shape=[H, W]) xx = paddle.expand(xx.reshape(shape=[1, 1, H, W]), shape=[B, 1, H, W]) yy = paddle.expand(yy.reshape(shape=[1, 1, H, W]), shape=[B, 1, H, W]) vgrid = paddle.concat((xx, yy), axis=1) #[B, 2, H, W] vgrid[:, :1, :, :] = vgrid[:, :1, :, :] - disp # scale grid to [-1,1] vgrid[:, 0, :, :] = 2.0 * vgrid[:, 0, :, :] / max(W - 1, 1) - 1.0 vgrid[:, 1, :, :] = 2.0 * vgrid[:, 1, :, :] / max(H - 1, 1) - 1.0 vgrid = paddle.transpose(vgrid, [0, 2, 3, 1]) #[B, H, W, 2] vgrid.stop_gradient = False output = F.grid_sample(x, vgrid) return output
def expand_v2_tensor(name: str, x, out_shape, use_tensor_in_list): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): node_x = paddle.static.data(name='x', shape=x.shape, dtype=data_type) if use_tensor_in_list: out_shape[0] = paddle.assign( np.array((out_shape[0], )).astype('int32')) out = paddle.expand(node_x, shape=out_shape, name='expand_v2') else: out_shape = np.array(out_shape).astype('int32') node_shape = paddle.assign(out_shape, output=None) out = paddle.expand(node_x, shape=node_shape, name='expand_v2') cpu = paddle.static.cpu_places(1) exe = paddle.static.Executor(cpu[0]) # startup program will call initializer to initialize the parameters. exe.run(paddle.static.default_startup_program()) outs = exe.run(feed={'x': x}, fetch_list=[out]) saveModel(name, exe, feedkeys=['x'], fetchlist=[out], inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1]) return outs[0]
def forward(self, nodes, edges, nums): start, cat_nodes = 0, [] for num in nums: sample_nodes = nodes[start:start + num] cat_nodes.append( paddle.concat([ paddle.expand(sample_nodes.unsqueeze(1), [-1, num, -1]), paddle.expand(sample_nodes.unsqueeze(0), [num, -1, -1]) ], -1).reshape([num**2, -1])) start += num cat_nodes = paddle.concat([paddle.concat(cat_nodes), edges], -1) cat_nodes = self.relu(self.in_fc(cat_nodes)) coefs = self.coef_fc(cat_nodes) start, residuals = 0, [] for num in nums: residual = F.softmax( -paddle.eye(num).unsqueeze(-1) * 1e9 + coefs[start:start + num**2].reshape([num, num, -1]), 1) residuals.append((residual * cat_nodes[start:start + num**2] .reshape([num, num, -1])).sum(1)) start += num**2 nodes += self.relu(self.out_fc(paddle.concat(residuals))) return [nodes, cat_nodes]
def forward(self, inputs): """ Get SOLOv2MaskHead output. Args: inputs(list[Tensor]): feature map from each necks with shape of [N, C, H, W] Returns: ins_pred(Tensor): Output of SOLOv2MaskHead head """ feat_all_level = F.relu(self.convs_all_levels[0](inputs[0])) for i in range(1, self.range_level): input_p = inputs[i] if i == (self.range_level - 1): input_feat = input_p x_range = paddle.linspace( -1, 1, paddle.shape(input_feat)[-1], dtype='float32') y_range = paddle.linspace( -1, 1, paddle.shape(input_feat)[-2], dtype='float32') y, x = paddle.meshgrid([y_range, x_range]) x = paddle.unsqueeze(x, [0, 1]) y = paddle.unsqueeze(y, [0, 1]) y = paddle.expand( y, shape=[paddle.shape(input_feat)[0], 1, -1, -1]) x = paddle.expand( x, shape=[paddle.shape(input_feat)[0], 1, -1, -1]) coord_feat = paddle.concat([x, y], axis=1) input_p = paddle.concat([input_p, coord_feat], axis=1) feat_all_level = paddle.add(feat_all_level, self.convs_all_levels[i](input_p)) ins_pred = F.relu(self.conv_pred(feat_all_level)) return ins_pred
def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input = np.random.random([12, 14]).astype("float32") x = fluid.layers.data(name='x', shape=[12, 14], append_batch_size=False, dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 12) expand_shape = fluid.layers.data(name="expand_shape", shape=[2], append_batch_size=False, dtype="int32") out_1 = paddle.expand(x, shape=[12, 14]) out_2 = paddle.expand(x, shape=[positive_2, 14]) out_3 = paddle.expand(x, shape=expand_shape) g0 = fluid.backward.calc_gradient(out_2, x) exe = fluid.Executor(place=paddle.NPUPlace(0)) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ "x": input, "expand_shape": np.array([12, 14]).astype("int32") }, fetch_list=[out_1, out_2, out_3]) assert np.array_equal(res_1, np.tile(input, (1, 1))) assert np.array_equal(res_2, np.tile(input, (1, 1))) assert np.array_equal(res_3, np.tile(input, (1, 1)))
def _compute_locations_by_level(self, fpn_stride, feature): """ Compute locations of anchor points of each FPN layer Args: fpn_stride (int): The stride of current FPN feature map feature (Tensor): Tensor of current FPN feature map Return: Anchor points locations of current FPN feature map """ shape_fm = paddle.shape(feature) shape_fm.stop_gradient = True h, w = shape_fm[2], shape_fm[3] shift_x = paddle.arange(0, w * fpn_stride, fpn_stride) shift_y = paddle.arange(0, h * fpn_stride, fpn_stride) shift_x = paddle.unsqueeze(shift_x, axis=0) shift_y = paddle.unsqueeze(shift_y, axis=1) shift_x = paddle.expand(shift_x, shape=[h, w]) shift_y = paddle.expand(shift_y, shape=[h, w]) shift_x.stop_gradient = True shift_y.stop_gradient = True shift_x = paddle.reshape(shift_x, shape=[-1]) shift_y = paddle.reshape(shift_y, shape=[-1]) location = paddle.stack([shift_x, shift_y], axis=-1) + float(fpn_stride) / 2 location.stop_gradient = True return location
def forward_decoder(self, x, z): """ decoder """ data = x[0] data_length = x[1] embedding_data = self.x_emb(data) z_0 = paddle.expand(z.unsqueeze(1), shape=[z.unsqueeze(1).shape[0], \ embedding_data.shape[1], z.unsqueeze(1).shape[2]]) x_input = paddle.concat([embedding_data, z_0], axis=-1) h_0 = self.decoder_lat(z) h_0 = paddle.expand(h_0.unsqueeze(0), \ shape=[self.decoder_rnn.num_layers, h_0.unsqueeze(0).shape[1], h_0.unsqueeze(0).shape[2]]) #### output, _ = self.decoder_rnn(x_input, h_0, sequence_length=data_length) y = self.decoder_fc(output) recon_loss = F.cross_entropy(paddle.reshape(y[:, :-1], shape=[-1, y.shape[-1]]), \ paddle.reshape(data[:, 1:], shape=[-1]), \ ignore_index=self.pad ) return recon_loss
def forward(self, s_emb, q_emb): if self.pre_fc: s_emb = self.mlp_proj(s_emb) q_emb = self.mlp_proj(q_emb) n_support = s_emb.shape[0] n_query = q_emb.shape[0] s_emb_rep = paddle.expand(s_emb,[n_query, s_emb.shape[0], s_emb.shape[1]]) q_emb_rep = q_emb.unsqueeze(1) all_emb = paddle.concat([s_emb_rep, q_emb_rep], 1) orig_all_emb = all_emb n_shot=int(n_support//2) all_emb_meann = all_emb[:,:n_shot].mean(1) all_emb_meanp = all_emb[:,n_shot:2*n_shot].mean(1) neg_proto_emb = paddle.transpose(paddle.expand(all_emb_meann,[n_support + 1, all_emb_meann.shape[0], all_emb_meann.shape[1]]),(1,0,2)) pos_proto_emb = paddle.transpose(paddle.expand(all_emb_meanp,[n_support + 1, all_emb_meanp.shape[0], all_emb_meanp.shape[1]]),(1,0,2)) all_emb = paddle.stack([all_emb, neg_proto_emb,pos_proto_emb], 2) q,s,n,d = all_emb.shape x=all_emb.reshape((q*s, n, d)) attn_x =self.attn_layer(x) attn_x=attn_x.reshape((q, s, n, d)) all_emb = attn_x[:,:,0,] all_emb = paddle.concat([all_emb, orig_all_emb],axis = -1) if not self.pre_fc: all_emb = self.mlp_proj(all_emb) return all_emb, None
def forward(self, input, target): """ Args: inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ inputs = input["features"] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) bs = inputs.shape[0] # compute distance dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) dist = dist + dist.t() dist = paddle.addmm(input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) dist = paddle.clip(dist, min=1e-12).sqrt() # hard negative mining is_pos = paddle.expand(target, (bs, bs)).equal( paddle.expand(target, (bs, bs)).t()) is_neg = paddle.expand(target, (bs, bs)).not_equal( paddle.expand(target, (bs, bs)).t()) # `dist_ap` means distance(anchor, positive) ## both `dist_ap` and `relative_p_inds` with shape [N, 1] ''' dist_ap, relative_p_inds = paddle.max( paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = paddle.min( paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) ''' dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos), (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg), (bs, -1)), axis=1, keepdim=True) # shape [N] dist_ap = paddle.squeeze(dist_ap, axis=1) dist_an = paddle.squeeze(dist_an, axis=1) # Compute ranking hinge loss y = paddle.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return {"TripletLossV2": loss}
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Notes: Currently only support bs = 1. Args: bbox_pred (Tensor): The output bboxes with shape [N, 6] after decode and NMS, including labels, scores and bboxes. bbox_num (Tensor): The number of prediction boxes of each batch with shape [1], and is N. im_shape (Tensor): The shape of the input image. scale_factor (Tensor): The scale factor of the input image. Returns: pred_result (Tensor): The final prediction results with shape [N, 6] including labels, scores and bboxes. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): bboxes [N, 10] bbox_num(Tensor): bbox_num im_shape(Tensor): [1 2] scale_factor(Tensor): [1 2] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 8] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([ scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x, scale_y ]) expand_scale = paddle.expand(scale, [bbox_num[i], 8]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 10], label, score, bbox pred_label_score = bboxes[:, 0:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image pred_bbox = pred_bbox.reshape([-1, 8]) scaled_bbox = pred_bbox / scale_factor_list origin_h = origin_shape_list[:, 0] origin_w = origin_shape_list[:, 1] bboxes = scaled_bbox zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros) y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros) x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros) y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros) x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros) y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros) x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros) y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1) pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1) return pred_result
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): The output of __call__ with shape [N, 6] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 6] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) # TODO: Because paddle.expand transform error when dygraph # to static, use reshape to avoid mistakes. expand_scale = paddle.reshape(expand_scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def calc_dist_matrix(x, y): """Calculate Euclidean distance matrix with paddle.Tensor""" n = x.shape[0] m = y.shape[0] d = x.shape[1] x = x.unsqueeze(1) x = paddle.expand(x, [n, m, d]) y = y.unsqueeze(0) y = paddle.expand(y, [n, m, d]) dist_matrix = paddle.sqrt(paddle.pow(x - y, 2).sum(2)) return dist_matrix
def forward(self, r): batch_size = r.size K = self.K ratio_r = r / self.cut_r phi = 1 - 6 * ratio_r.pow(5) + 15 * ratio_r.pow(4) - 10 * ratio_r.pow( 3) phi = paddle.expand(phi, shape=[batch_size, K]) local_r = paddle.expand(r, shape=[batch_size, K]) g = phi * paddle.exp( -self.beta.expand([batch_size, K]) * (paddle.exp(-local_r) - self.mu.expand([batch_size, K]))**2) return g
def get_prediction(self, bbox_head_out, rois): if len(bbox_head_out) == 1: proposal, proposal_num = rois score, delta = bbox_head_out[0] bbox_prob = F.softmax(score) delta = paddle.reshape(delta, (-1, self.delta_dim, 4)) else: num_stage = len(rois) proposal_list = [] prob_list = [] delta_list = [] for stage, (proposals, bboxhead) in zip(rois, bbox_head_out): score, delta = bboxhead proposal, proposal_num = proposals if stage in self.score_stage: bbox_prob = F.softmax(score) prob_list.append(bbox_prob) if stage in self.delta_stage: proposal_list.append(proposal) delta_list.append(delta) bbox_prob = paddle.mean(paddle.stack(prob_list), axis=0) delta = paddle.mean(paddle.stack(delta_list), axis=0) proposal = paddle.mean(paddle.stack(proposal_list), axis=0) delta = paddle.reshape(delta, (-1, self.out_dim, 4)) if self.cls_agnostic: N, C, M = delta.shape delta = delta[:, 1:2, :] delta = paddle.expand(delta, [N, self.num_classes, M]) bboxes = (proposal, proposal_num) bbox_pred = (delta, bbox_prob) return bbox_pred, bboxes
def forward(self, node_feat, edge_feat): # get size num_tasks = node_feat.shape[0] num_data = node_feat.shape[1] # get eye matrix (batch_size x 2 x node_size x node_size) diag_mask = 1.0 - paddle.expand( paddle.eye(num_data), [num_tasks, self.edge_dim, num_data, num_data]) # set diagonal as zero and normalize edge_feat = F.normalize(edge_feat * diag_mask, p=1, axis=-1) # compute attention and aggregate aggr_feat = paddle.bmm( paddle.concat(paddle.split(edge_feat, 2, 1), self.edge_dim).squeeze(1), node_feat) node_feat = paddle.transpose( paddle.concat( [node_feat, paddle.concat(paddle.split(aggr_feat, 2, 1), -1)], -1), (0, 2, 1)) # non-linear transform node_feat = paddle.transpose(self.network(node_feat.unsqueeze(-1)), (0, 2, 1, 3)).squeeze(-1) return node_feat
def _encoder_forward(self, src, src_mask=[None, None]): output = src if src_mask[1] is not None: head_mask = src_mask[1] if len(head_mask.shape) == 1: head_mask = paddle.unsqueeze( paddle.unsqueeze( paddle.unsqueeze(paddle.unsqueeze(head_mask, 0), 0), -1), -1) head_mask = paddle.expand(head_mask, shape=[self.num_layers] + head_mask.shape[1:]) elif len(head_mask.shape) == 2: head_mask = paddle.unsqueeze( paddle.unsqueeze(paddle.unsqueeze(head_mask, 1), -1), -1) else: head_mask = [None] * self.num_layers for i, mod in enumerate(self.layers): output = mod(output, src_mask=[src_mask[0], head_mask[i]]) if self.norm is not None: output = self.norm(output) return output
def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): f_char = paddle.transpose(f_char, [0, 2, 3, 1]) tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) tcl_pos = paddle.cast(tcl_pos, dtype=int) f_tcl_char = paddle.gather_nd(f_char, tcl_pos) f_tcl_char = paddle.reshape(f_tcl_char, [-1, 64, 37]) # len(Lexicon_Table)+1 f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 b, c, l = tcl_mask.shape tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) tcl_mask_fg.stop_gradient = True f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (-20.0) f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) N, B, _ = f_tcl_char_ld.shape input_lengths = paddle.to_tensor([N] * B, dtype='int64') cost = paddle.nn.functional.ctc_loss(log_probs=f_tcl_char_ld, labels=tcl_label, input_lengths=input_lengths, label_lengths=label_t, blank=self.pad_num, reduction='none') cost = cost.mean() return cost
def forward(self, r): batch_size = r.size K = self.K local_r = paddle.expand(r, shape=[batch_size, K]) g = paddle.exp(-self.beta.expand([batch_size, K]) * (local_r - self.mu.expand([batch_size, K]))**2) return g
def loss(self, embeds): """ Computes the softmax loss according the section 2.1 of GE2E. :param embeds: the embeddings as a tensor of shape (speakers_per_batch, utterances_per_speaker, embedding_size) :return: the loss and the EER for this batch of embeddings. """ speakers_per_batch, utterances_per_speaker = embeds.shape[:2] # Loss sim_matrix, *_ = self.similarity_matrix(embeds) sim_matrix = sim_matrix.reshape( [speakers_per_batch * utterances_per_speaker, speakers_per_batch]) target = paddle.arange(0, speakers_per_batch, dtype="int64").unsqueeze(-1) target = paddle.expand(target, [speakers_per_batch, utterances_per_speaker]) target = paddle.reshape(target, [-1]) loss = nn.CrossEntropyLoss()(sim_matrix, target) # EER (not backpropagated) with paddle.no_grad(): ground_truth = target.numpy() inv_argmax = lambda i: np.eye( 1, speakers_per_batch, i, dtype=np.int)[0] labels = np.array([inv_argmax(i) for i in ground_truth]) preds = sim_matrix.numpy() # Snippet from https://yangcha.github.io/EER-ROC/ fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten()) eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) return loss, eer
def cal_adj_acc(self, pred_eval, model): labels = pred_eval['sup_labels'] adj_list = pred_eval['adj'] cnt_sum, cnt_correct = 0, 0 for ii in range(len(adj_list)): adj = adj_list[ii] s_label = labels['support'] q_label = labels['query'][ii] n_support = s_label.shape[0] n_query = q_label.shape[0] s_label = paddle.expand(s_label, [n_query, s_label.shape[0]]) q_label = q_label.unsqueeze(1) total_label = paddle.concat([s_label, q_label], 1) label_edge = model.layers.label2edge(total_label) pred_edge = adj #/adj.sum(1) pred_edge = paddle.where(pred_edge >= 0.5, paddle.ones_like(pred_edge), pred_edge) pred_edge = paddle.where(pred_edge < 0.5, paddle.zeros_like(pred_edge), pred_edge) cor = paddle.cast(pred_edge == label_edge, dtype='float32').sum() incor = paddle.cast(pred_edge != label_edge, dtype='float32').sum() cnt_sum += (cor + incor) cnt_correct += cor acc = cnt_correct / cnt_sum return acc
def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): f_char = paddle.transpose(f_char, [0, 2, 3, 1]) tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) tcl_pos = paddle.cast(tcl_pos, dtype=int) f_tcl_char = paddle.gather_nd(f_char, tcl_pos) f_tcl_char = paddle.reshape(f_tcl_char, [-1, 64, 37]) # len(Lexicon_Table)+1 f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 b, c, l = tcl_mask.shape tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) tcl_mask_fg.stop_gradient = True f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (-20.0) f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) N, B, _ = f_tcl_char_ld.shape input_lengths = paddle.to_tensor([N] * B, dtype='int64') loss_out = paddle.fluid.layers.warpctc(f_tcl_char_ld, tcl_label, self.pad_num, True, input_lengths, label_t) cost = paddle.fluid.layers.squeeze(loss_out, [-1]) cost = cost.mean() return cost
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # [N, C*4] bbox = paddle.concat(roi) bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # [N*C, 4] bbox_num_class = bbox.shape[1] // 4 bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores