def forward(self, inputs): text = inputs[0] pos_tag = inputs[1] neg_tag = inputs[2] text_emb = self.text_embedding(text) text_emb = paddle.reshape( text_emb, shape=[-1, self.text_len, self.emb_dim]) pos_tag_emb = self.tag_embedding(pos_tag) pos_tag_emb = paddle.reshape(pos_tag_emb, shape=[-1, self.emb_dim]) neg_tag_emb = self.tag_embedding(neg_tag) neg_tag_emb = paddle.reshape( neg_tag_emb, shape=[-1, self.neg_size, self.emb_dim]) conv_1d = self.conv(text_emb) act = paddle.tanh(conv_1d) maxpool = paddle.max(act, axis=1) maxpool = paddle.reshape(maxpool, shape=[-1, self.hid_dim]) text_hid = self.hid_fc(maxpool) cos_pos = F.cosine_similarity( pos_tag_emb, text_hid, axis=1).reshape([-1, 1]) # fluid.layers.Print(cos_pos) neg_tag_emb = paddle.max(neg_tag_emb, axis=1) neg_tag_emb = paddle.reshape(neg_tag_emb, shape=[-1, self.emb_dim]) cos_neg = F.cosine_similarity( neg_tag_emb, text_hid, axis=1).reshape([-1, 1]) # fluid.layers.Print(cos_neg) return cos_pos, cos_neg
def test_1(self): # type: float with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="float32") result_max = paddle.max(input=data, dim=1) place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.rand(10, 10).astype(np.float32) res, = exe.run(feed={"data": input_data}, fetch_list=[result_max]) self.assertEqual((res == np.max(input_data, axis=1)).all(), True) # type: int with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="int64") result_max = paddle.max(input=data, dim=1) place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) res, = exe.run(feed={"data": input_data}, fetch_list=[result_max]) self.assertEqual((res == np.max(input_data, axis=1)).all(), True) # dygraph with fluid.dygraph.guard(): np_x = np.array([10, 10]).astype('float64') x = fluid.dygraph.to_variable(np_x) z = paddle.max(x, dim=0) np_z = z.numpy() z_expected = np.array(np.max(np_x, axis=0)) self.assertEqual((np_z == z_expected).all(), True)
def test_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data = paddle.static.data("data", shape=[10, 10], dtype="float32") result_max = paddle.max(x=data, axis=1) exe = paddle.static.Executor(self.place) input_data = np.random.rand(10, 10).astype(np.float32) res, = exe.run(feed={"data": input_data}, fetch_list=[result_max]) self.assertEqual((res == np.max(input_data, axis=1)).all(), True) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_max = paddle.max(x=data, axis=0) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) res, = exe.run(feed={"data": input_data}, fetch_list=[result_max]) self.assertEqual((res == np.max(input_data, axis=0)).all(), True) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_max = paddle.max(x=data, axis=(0, 1)) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) res, = exe.run(feed={"data": input_data}, fetch_list=[result_max]) self.assertEqual((res == np.max(input_data, axis=(0, 1))).all(), True)
def kl_divergence(self, other): """The KL-divergence between two Categorical distributions. Args: other (Categorical): instance of Categorical. The data type is float32. Returns: Tensor: kl-divergence between two Categorical distributions. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] paddle.seed(200) # on CPU device y = paddle.rand([6]) print(y) # [0.77663314 0.90824795 0.15685187 # 0.04279523 0.34468332 0.7955718 ] cat = Categorical(x) cat2 = Categorical(y) cat.kl_divergence(cat2) # [0.071952] """ name = self.name + '_kl_divergence' if not _non_static_mode(): check_type(other, 'other', Categorical, 'kl_divergence') logits = self.logits - \ paddle.max(self.logits, axis=-1, keepdim=True) other_logits = other.logits - paddle.max( other.logits, axis=-1, keepdim=True) e_logits = ops.exp(logits) other_e_logits = ops.exp(other_logits) z = paddle.sum(e_logits, axis=-1, keepdim=True) other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True) prob = e_logits / z kl = paddle.sum( prob * (logits - paddle.log(z) - other_logits + paddle.log(other_z)), axis=-1, keepdim=True, name=name) return kl
def compute_violation_metrics( batch: Dict[str, paddle.Tensor], atom14_pred_positions: paddle.Tensor, # (B, N, 14, 3) violations: Dict[str, paddle.Tensor]) -> Dict[str, paddle.Tensor]: """Compute several metrics to assess the structural violations.""" batch_size = atom14_pred_positions.shape[0] ret = {} extreme_ca_ca_violations = all_atom.extreme_ca_ca_distance_violations( pred_atom_positions=atom14_pred_positions, pred_atom_mask=paddle.cast(batch['atom14_atom_exists'], 'float32'), residue_index=paddle.cast(batch['residue_index'], 'float32')) ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations violations_between_residue_bond_tmp = [] for i in range(batch_size): violations_between_residue_bond_i = utils.mask_mean(mask=batch['seq_mask'][i], value=violations['between_residues']['connections_per_residue_violation_mask'][i]) violations_between_residue_bond_tmp.append(violations_between_residue_bond_i) violations_between_residue_bond = paddle.to_tensor(violations_between_residue_bond_tmp, stop_gradient=False) violations_between_residue_bond = paddle.squeeze(violations_between_residue_bond, axis=-1) ret['violations_between_residue_bond'] = violations_between_residue_bond violations_between_residue_clash_tmp = [] for i in range(batch_size): violations_between_residue_clash_i = utils.mask_mean(mask=batch['seq_mask'][i], value=paddle.max(violations['between_residues']['clashes_per_atom_clash_mask'], axis=-1)[i]) violations_between_residue_clash_tmp.append(violations_between_residue_clash_i) violations_between_residue_clash = paddle.to_tensor(violations_between_residue_clash_tmp, stop_gradient=False) violations_between_residue_clash = paddle.squeeze(violations_between_residue_clash, axis=-1) ret['violations_between_residue_clash'] = violations_between_residue_clash violations_within_residue_tmp = [] for i in range(batch_size): violations_within_residue_i = utils.mask_mean(mask=batch['seq_mask'][i], value=paddle.max(violations['within_residues']['per_atom_violations'], axis=-1)[i]) violations_within_residue_tmp.append(violations_within_residue_i) violations_within_residue = paddle.to_tensor(violations_within_residue_tmp, dtype='float32', stop_gradient=False) violations_within_residue = paddle.squeeze(violations_within_residue, axis=-1) ret['violations_within_residue'] = violations_within_residue violations_per_residue_tmp = [] for i in range(batch_size): violations_per_residue_i = utils.mask_mean(mask=batch['seq_mask'][i], value=violations['total_per_residue_violations_mask'][i]) violations_per_residue_tmp.append(violations_per_residue_i) violations_per_residue = paddle.to_tensor(violations_per_residue_tmp, dtype='float32', stop_gradient=False) violations_per_residue = paddle.squeeze(violations_per_residue, axis=-1) ret['violations_per_residue'] = violations_per_residue return ret
def test_big_dimension(self): paddle.disable_static() x = paddle.rand(shape=[2, 2, 2, 2, 2, 2, 2]) np_x = x.numpy() z1 = paddle.max(x, axis=-1) z2 = paddle.max(x, axis=6) np_z1 = z1.numpy() np_z2 = z2.numpy() z_expected = np.array(np.max(np_x, axis=6)) self.assertEqual((np_z1 == z_expected).all(), True) self.assertEqual((np_z2 == z_expected).all(), True)
def relprop(self, R, alpha): if self.X.shape[1] == 3: pw = paddle.clip(self.weight, min=0) nw = paddle.clip(self.weight, max=0) X = self.X # print(X.shape) # [1, 3, 224, 224] L = self.X * 0 + \ paddle.min(paddle.min(paddle.min(self.X, axis=1, keepdim=True), axis=2, keepdim=True), axis=3, keepdim=True) H = self.X * 0 + \ paddle.max(paddle.max(paddle.max(self.X, axis=1, keepdim=True), axis=2, keepdim=True), axis=3, keepdim=True) Za = F.conv2d(X, self.weight, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(L, pw, bias=None, stride=self._stride, padding=self._padding) - \ F.conv2d(H, nw, bias=None, stride=self._stride, padding=self._padding) + 1e-9 S = R / Za C = X * self.gradprop2(S, self.weight) - L * \ self.gradprop2(S, pw) - H * self.gradprop2(S, nw) R = C else: beta = alpha - 1 pw = paddle.clip(self.weight, min=0) nw = paddle.clip(self.weight, max=0) px = paddle.clip(self.X, min=0) nx = paddle.clip(self.X, max=0) def f(w1, w2, x1, x2): Z1 = F.conv2d(x1, w1, bias=None, stride=self._stride, padding=self._padding) Z2 = F.conv2d(x2, w2, bias=None, stride=self._stride, padding=self._padding) S1 = safe_divide(R, Z1) S2 = safe_divide(R, Z2) C1 = x1 * self.gradprop(Z1, x1, S1)[0] C2 = x2 * self.gradprop(Z2, x2, S2)[0] return C1 + C2 activator_relevances = f(pw, nw, px, nx) inhibitor_relevances = f(nw, pw, px, nx) R = alpha * activator_relevances - beta * inhibitor_relevances return R
def forward(self, inputs, is_infer=False): self.q_slots = inputs[0] self.pt_slots = inputs[1] if not is_infer: self.nt_slots = inputs[2] q_embs = [self.embedding(query) for query in self.q_slots] q_encodes = [] for emb in q_embs: emb = paddle.reshape( emb, shape=[-1, self.query_len, self.query_encode_dim]) gru = self.gru(emb) maxpool = paddle.max(gru[0], axis=1) maxpool = paddle.reshape(maxpool, shape=[-1, self.query_encode_dim]) q_encodes.append(maxpool) q_concat = paddle.concat(q_encodes, axis=1) q_hid = self.q_fc(q_concat) pt_embs = [self.embedding(title) for title in self.pt_slots] pt_encodes = [] for emb in pt_embs: emb = paddle.reshape( emb, shape=[-1, self.pos_len, self.title_encode_dim]) gru = self.gru(emb) maxpool = paddle.max(gru[0], axis=1) maxpool = paddle.reshape(maxpool, shape=[-1, self.title_encode_dim]) pt_encodes.append(maxpool) pt_concat = paddle.concat(pt_encodes, axis=1) pt_hid = self.t_fc(pt_concat) cos_pos = F.cosine_similarity(q_hid, pt_hid, axis=1).reshape([-1, 1]) if is_infer: return cos_pos, paddle.ones(shape=[1, 1]) nt_embs = [self.embedding(title) for title in self.nt_slots] nt_encodes = [] for emb in nt_embs: emb = paddle.reshape( emb, shape=[-1, self.neg_len, self.title_encode_dim]) gru = self.gru(emb) maxpool = paddle.max(gru[0], axis=1) maxpool = paddle.reshape(maxpool, shape=[-1, self.title_encode_dim]) nt_encodes.append(maxpool) nt_concat = paddle.concat(nt_encodes, axis=1) nt_hid = self.t_fc(nt_concat) cos_neg = F.cosine_similarity(q_hid, nt_hid, axis=1).reshape([-1, 1]) return cos_pos, cos_neg
def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre, cls_out_channels, use_sigmoid_cls): assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] idx = 0 for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): cls_score = paddle.reshape(cls_score, [-1, cls_out_channels]) if use_sigmoid_cls: scores = F.sigmoid(cls_score) else: scores = F.softmax(cls_score, axis=-1) # bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5) bbox_pred = paddle.transpose(bbox_pred, [1, 2, 0]) bbox_pred = paddle.reshape(bbox_pred, [-1, 5]) anchors = paddle.reshape(anchors, [-1, 5]) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if use_sigmoid_cls: max_scores = paddle.max(scores, axis=1) else: max_scores = paddle.max(scores[:, 1:], axis=1) topk_val, topk_inds = paddle.topk(max_scores, nms_pre) anchors = paddle.gather(anchors, topk_inds) bbox_pred = paddle.gather(bbox_pred, topk_inds) scores = paddle.gather(scores, topk_inds) target_means = (.0, .0, .0, .0, .0) target_stds = (1.0, 1.0, 1.0, 1.0, 1.0) bboxes = bbox_utils.delta2rbox(anchors, bbox_pred, target_means, target_stds) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) idx += 1 mlvl_bboxes = paddle.concat(mlvl_bboxes, axis=0) mlvl_scores = paddle.concat(mlvl_scores) if use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = paddle.zeros([mlvl_scores.shape[0], 1], dtype=mlvl_scores.dtype) mlvl_scores = paddle.concat([padding, mlvl_scores], axis=1) return mlvl_scores, mlvl_bboxes
def __call__(self, mask_out, bboxes, bbox_num, origin_shape): """ Decode the mask_out and paste the mask to the origin image. Args: mask_out (Tensor): mask_head output with shape [N, 28, 28]. bbox_pred (Tensor): The output bboxes with shape [N, 6] after decode and NMS, including labels, scores and bboxes. bbox_num (Tensor): The number of prediction boxes of each batch with shape [1], and is N. origin_shape (Tensor): The origin shape of the input image, the tensor shape is [N, 2], and each row is [h, w]. Returns: pred_result (Tensor): The final prediction mask results with shape [N, h, w] in binary mask style. """ num_mask = mask_out.shape[0] origin_shape = paddle.cast(origin_shape, 'int32') if self.export_onnx: h, w = origin_shape[0][0], origin_shape[0][1] mask_onnx = self.paste_mask(mask_out[:, None, :, :], bboxes[:, 2:], h, w) mask_onnx = mask_onnx >= self.binary_thresh pred_result = paddle.cast(mask_onnx, 'int32') else: max_h = paddle.max(origin_shape[:, 0]) max_w = paddle.max(origin_shape[:, 1]) pred_result = paddle.zeros([num_mask, max_h, max_w], dtype='int32') - 1 id_start = 0 for i in range(paddle.shape(bbox_num)[0]): bboxes_i = bboxes[id_start:id_start + bbox_num[i], :] mask_out_i = mask_out[id_start:id_start + bbox_num[i], :, :] im_h = origin_shape[i, 0] im_w = origin_shape[i, 1] bbox_num_i = bbox_num[id_start] pred_mask = self.paste_mask(mask_out_i[:, None, :, :], bboxes_i[:, 2:], im_h, im_w) pred_mask = paddle.cast(pred_mask >= self.binary_thresh, 'int32') pred_result[id_start:id_start + bbox_num[i], :im_h, :im_w] = pred_mask id_start += bbox_num[i] if self.assign_on_cpu: paddle.set_device('gpu') return pred_result
def forward(self, inputs): """ Input: inputs: input points data, [B, 3, N] Return: x: points feature, [B, C', N] """ x1 = self.pointnet_1(inputs) x2 = paddle.max(x1, axis=-1, keepdim=True) x2 = paddle.tile(x2, [1, 1, self.max_points]) x1 = paddle.concat([x1, x2], axis=1) x2 = self.pointnet_2(x1) x2 = paddle.max(x2, axis=-1, keepdim=True) return x1, x2
def visual_in_traning(log_writer, vis_dict, step): """ Visual in vdl Args: log_writer (LogWriter): The log writer of vdl. vis_dict (dict): Dict of tensor. The shape of thesor is (C, H, W) """ for key, value in vis_dict.items(): value_shape = value.shape if value_shape[0] not in [1, 3]: value = value[0] value = value.unsqueeze(0) value = paddle.transpose(value, (1, 2, 0)) min_v = paddle.min(value) max_v = paddle.max(value) if (min_v > 0) and (max_v < 1): value = value * 255 elif (min_v < 0 and min_v >= -1) and (max_v <= 1): value = (1 + value) / 2 * 255 else: value = (value - min_v) / (max_v - min_v) * 255 value = value.astype('uint8') value = value.numpy() log_writer.add_image(tag=key, img=value, step=step)
def test_set_outsize_gpu(self): if paddle.fluid.core.is_compiled_with_cuda(): x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]), dtype="float32") src_index = paddle.to_tensor(np.array([0, 0, 1]), dtype="int32") dst_index = paddle.to_tensor(np.array([0, 1, 1]), dtype="int32") res = paddle.incubate.graph_send_recv(x, src_index, dst_index, "sum") out_size = paddle.max(dst_index) + 1 res_set_outsize = paddle.incubate.graph_send_recv( x, src_index, dst_index, "sum", out_size) np_res = np.array([[0, 2, 3], [1, 6, 8], [0, 0, 0]], dtype="float32") np_res_set_outsize = np.array([[0, 2, 3], [1, 6, 8]], dtype="float32") self.assertTrue( np.allclose(np_res, res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, res)) self.assertTrue( np.allclose(np_res_set_outsize, res_set_outsize, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res_set_outsize, res_set_outsize))
def forward(self, input_ids): """ Args: input_ids(Tensor): See :class:`GPTModel`. Returns: Tensor: Returns tensor `src_ids`, which means the indices of output sequence tokens in the vocabulary. They are numerical representations of tokens that build the output sequence. """ output, cached_kvs = self.model(input_ids, use_cache=True, cache=None) src_ids = input_ids nid = paddle.argmax(output[:, -1, :], axis=-1).reshape([-1, 1]) src_ids = paddle.concat([src_ids, nid], axis=1) cur_len = 0 while (cur_len < self.max_predict_len): output, cached_kvs = self.model( nid, use_cache=True, cache=cached_kvs) nid = paddle.argmax(output[:, -1, :], axis=-1).reshape([-1, 1]) src_ids = paddle.concat([src_ids, nid], axis=1) cur_len += 1 if paddle.max(nid) == self.eol_token_id: break return src_ids
def forward(self, x): # x: input features with shape [b, c, h, w] b, c, h, w = x.shape # channel_attention if self.c_state: y_avg = self.avg_pool(x) y_max = self.max_pool(x) y_c = self.c_attention(y_avg.squeeze(-1).transpose((0,2,1))).transpose((0,2,1)).unsqueeze(-1)+\ self.c_attention(y_max.squeeze(-1).transpose((0,2,1))).transpose((0,2,1)).unsqueeze(-1) y_c = self.sigmoid(y_c) #spatial_attention if self.s_state: x_s = self.conv_s(x) avg_out = paddle.mean(x_s, axis=1, keepdim=True) max_out = paddle.max(x_s, axis=1, keepdim=True) y_s = paddle.concat([avg_out, max_out], axis=1) y_s = self.sigmoid(self.s_attention(y_s)) if self.c_state and self.s_state: y = x * y_s * y_c + x elif self.c_state: y = x * y_c + x elif self.s_state: y = x * y_s + x else: y = x return y
def forward(self, x): avg_out = paddle.mean(x, axis=1, keepdim=True) max_out = paddle.max(x, axis=1, keepdim=True) x = paddle.concat([avg_out, max_out], axis=1) x = self.conv(x) x = F.sigmoid(x) return x
def libra_label_box(anchors, gt_boxes, gt_classes, positive_overlap, negative_overlap, num_classes): # TODO: use paddle API to speed up gt_classes = gt_classes.numpy() gt_overlaps = np.zeros((anchors.shape[0], num_classes)) matches = np.zeros((anchors.shape[0]), dtype=np.int32) if len(gt_boxes) > 0: proposal_to_gt_overlaps = bbox_overlaps(anchors, gt_boxes).numpy() overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) overlaps_max = proposal_to_gt_overlaps.max(axis=1) # Boxes which with non-zero overlap with gt boxes overlapped_boxes_ind = np.where(overlaps_max > 0)[0] overlapped_boxes_gt_classes = gt_classes[ overlaps_argmax[overlapped_boxes_ind]] for idx in range(len(overlapped_boxes_ind)): gt_overlaps[overlapped_boxes_ind[idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[ overlapped_boxes_ind[idx]] matches[overlapped_boxes_ind[idx]] = overlaps_argmax[ overlapped_boxes_ind[idx]] gt_overlaps = paddle.to_tensor(gt_overlaps) matches = paddle.to_tensor(matches) matched_vals = paddle.max(gt_overlaps, axis=1) match_labels = paddle.full(matches.shape, -1, dtype='int32') match_labels = paddle.where(matched_vals < negative_overlap, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) return matches, match_labels, matched_vals
def forward(self, inputs): inputs = paddle.to_tensor(inputs[0]) batchsize = inputs.shape[0] t_net = self.input_transform_net(inputs) t_net = paddle.squeeze(t_net, axis=-1) t_net = self.input_fc(t_net) t_net = paddle.reshape(t_net, [batchsize, 3, 3]) x = paddle.transpose(inputs, (0, 2, 1)) x = paddle.matmul(x, t_net) x = paddle.transpose(x, (0, 2, 1)) x = self.mlp_1(x) t_net = self.feature_transform_net(x) t_net = paddle.squeeze(t_net, axis=-1) t_net = self.feature_fc(t_net) t_net = paddle.reshape(t_net, [batchsize, 64, 64]) x = paddle.squeeze(x, axis=-1) x = paddle.transpose(x, (0, 2, 1)) x = paddle.matmul(x, t_net) x = paddle.transpose(x, (0, 2, 1)) point_feat = x x = self.mlp_2(x) x = paddle.max(x, axis=-1, keepdim=True) global_feat_expand = paddle.tile(x, [1, 1, self.max_point]) x = paddle.concat([point_feat, global_feat_expand], axis=1) x = self.seg_net(x) x = paddle.squeeze(x, axis=-1) x = paddle.transpose(x, (0, 2, 1)) return x
def softmax_with_cross_entropy(self, shard_logit, shard_one_hot): shard_max = paddle.max(shard_logit, axis=1, keepdim=True) global_max = shard_max paddle.distributed.all_reduce(global_max, op=paddle.distributed.ReduceOp.MAX) shard_logit_new = paddle.subtract(shard_logit, global_max) shard_exp = paddle.exp(shard_logit_new) shard_demon = paddle.sum(shard_exp, axis=1, keepdim=True) global_demon = shard_demon paddle.distributed.all_reduce(global_demon, op=paddle.distributed.ReduceOp.SUM) global_log_demon = paddle.log(global_demon) shard_log_prob = shard_logit_new - global_log_demon shard_prob = paddle.exp(shard_log_prob) target_log_prob = paddle.min(shard_log_prob * shard_one_hot, axis=1, keepdim=True) shard_loss = paddle.scale(target_log_prob, scale=-1.0) #TODO paddle.distributed.reducescatter not found global_loss = paddle.fluid.layers.collective._c_reducescatter( shard_loss, nranks=self.nranks, use_calc_stream=True) return global_loss, shard_prob
def forward(self, input, target=None): #normalization features = input["features"] features = self._nomalize(features) samples_each_class = self.samples_each_class rerange_index = paddle.to_tensor(self.rerange_index) #calc sm diffs = paddle.unsqueeze(features, axis=1) - paddle.unsqueeze(features, axis=0) similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) #rerange tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) tmp = paddle.gather(tmp, index=rerange_index) similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) #split ignore, pos, neg = paddle.split( similary_matrix, num_or_sections=[1, samples_each_class - 1, -1], axis=1) ignore.stop_gradient = True hard_pos = paddle.max(pos) hard_neg = paddle.min(neg) loss = hard_pos + self.margin - hard_neg loss = paddle.nn.ReLU()(loss) return {"msmloss": loss}
def train(): global e_greed, update_num total_reward = 0 # 重置游戏状态 obs = env.reset() obs = preprocess(obs) while True: # 使用贪心策略获取游戏动作的来源 e_greed = max(0.01, e_greed - e_greed_decrement) if np.random.rand() < e_greed: # 随机生成动作 action = env.action_space() else: # 策略模型预测游戏动作 obs1 = np.expand_dims(obs, axis=0) action = policyQ(paddle.to_tensor(obs1, dtype='float32')) action = paddle.argmax(action).numpy()[0] # 执行游戏 next_obs, reward, done, info = env.step(action) next_obs = preprocess(next_obs) n_step_buffer.append((obs, action, reward, next_obs, done)) if len(n_step_buffer) > n_step: n_step_reward = sum([n_step_buffer[i][2]*(gamma**i) for i in range(n_step)]) total_reward += n_step_reward n_step_obs, n_step_action, _, n_step_next_obs, n_step_done = n_step_buffer.pop(0) rpm.append((n_step_obs, n_step_action, n_step_reward, n_step_next_obs, n_step_done)) obs = next_obs # 游戏结束 if done: while len(n_step_buffer) > 0: n_step_reward = sum([n_step_buffer[i][2] * (gamma ** i) for i in range(len(n_step_buffer))]) n_step_obs, n_step_action, _, n_step_next_obs, n_step_done = n_step_buffer.pop(0) rpm.append((n_step_obs, n_step_action, n_step_reward, n_step_next_obs, n_step_done)) break # 记录的数据打印batch_size就开始训练 if len(rpm) > batch_size: # 获取训练数据 batch_obs, batch_action, batch_reword, batch_next_obs, batch_done = rpm.sample(batch_size) # 计算损失函数 action_value = policyQ(batch_obs) action_onehot = paddle.nn.functional.one_hot(batch_action, action_dim) pred_action_value = paddle.sum(action_value * action_onehot, axis=1) best_v = targetQ(batch_next_obs) best_v = paddle.max(best_v, axis=1) best_v.stop_gradient = True target = batch_reword + gamma ** n_step * best_v * (1.0 - batch_done) cost = paddle.nn.functional.mse_loss(pred_action_value, target) # 梯度更新 cost.backward() optimizer.step() optimizer.clear_grad() # 指定的训练次数更新一次目标模型的参数 if update_num % 200 == 0: targetQ.load_dict(policyQ.state_dict()) update_num += 1 return total_reward
def forward(self, xyz, points): """ Input: xyz: input points position data, [B, C, N] points: input points data, [B, D, N] Return: new_xyz: sampled points position data, [B, C, S] new_points_concat: sample points feature data, [B, D', S] """ xyz = xyz.transpose([0, 2, 1]) if points is not None: points = points.transpose([0, 2, 1]) # new_xyz: sampled points position data, [B, npoint, C] # new_points: sampled points data, [B, npoint, nsample, C+D] if self.group_all: new_xyz, new_points = sample_and_group_all(xyz, points) else: new_xyz, new_points = sample_and_group(self.npoint, self.radius, self.nsample, xyz, points) new_points = new_points.transpose([0, 3, 2, 1]) for i, conv in enumerate(self.mlp_convs): bn = self.mlp_bns[i] new_points = F.relu(bn(conv(new_points))) new_points = paddle.max(new_points, 2) new_xyz = new_xyz.transpose([0, 2, 1]) return new_xyz, new_points
def abs_max_run(self, reader, exe, step=None, loss_name=None): fetch_list = [] with paddle.static.program_guard(self.program): for act_name in self.real_names: act = self.program.global_block().var(act_name) act = paddle.max(paddle.abs(act), name=act_name + "_reduced") fetch_list.append(act_name + "_reduced.tmp_0") if not hasattr(self.program, '_program'): # Compile the native program to speed up program = paddle.static.CompiledProgram( self.program).with_data_parallel(loss_name=loss_name) for idx, data in enumerate(reader): vars_np = exe.run(program=program, feed=data, fetch_list=fetch_list) vars_np = [np.max(var) for var in vars_np] mapped_vars_np = dict(zip(self.real_names, vars_np)) values = self.update(mapped_vars_np) if idx % 10 == 0: _logger.info("Collecting..., Step: {}".format(idx)) if step is not None and idx + 1 >= step: break return values
def forward_test(self, src): bs = paddle.shape(src)[0] if self.encoder is not None: src = self.positional_encoding(paddle.transpose(src, [1, 0, 2])) memory = self.encoder(src) else: memory = paddle.transpose(paddle.squeeze(src, 2), [2, 0, 1]) dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32) for len_dec_seq in range(1, 25): dec_seq_embed = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) dec_seq_embed = self.positional_encoding(dec_seq_embed) tgt_mask = self.generate_square_subsequent_mask( paddle.shape(dec_seq_embed)[0]) output = self.decoder(dec_seq_embed, memory, tgt_mask=tgt_mask, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None) dec_output = paddle.transpose(output, [1, 0, 2]) dec_output = dec_output[:, -1, :] word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) preds_idx = paddle.argmax(word_prob, axis=1) if paddle.equal_all( preds_idx, paddle.full(paddle.shape(preds_idx), 3, dtype='int64')): break preds_prob = paddle.max(word_prob, axis=1) dec_seq = paddle.concat( [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1) dec_prob = paddle.concat( [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1) return [dec_seq, dec_prob]
def relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128): ret = 0 if bidirectional: num_buckets //= 2 ret += (relative_position > 0).astype(paddle.int64) * num_buckets n = paddle.abs(relative_position) else: n = paddle.max(-relative_position, paddle.zeros_like(relative_position)) # now n is in the range [0, inf) # half of the buckets are for exact increments in positions max_exact = num_buckets // 2 is_small = n < max_exact # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance val_if_large = max_exact + (paddle.log( n.astype(paddle.float32) / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)).astype(paddle.int64) val_if_large = paddle.minimum( val_if_large, paddle.full_like(val_if_large, num_buckets - 1)) ret += paddle.where(is_small, n, val_if_large) return ret
def forward(self, inputs): x = paddle.to_tensor(inputs) batchsize = x.shape[0] t_net = self.input_transform_net(x) t_net = paddle.squeeze(t_net, axis=-1) t_net = self.input_fc(t_net) t_net = paddle.reshape(t_net, [batchsize, 3, 3]) x = paddle.transpose(x, (0, 2, 1)) x = paddle.matmul(x, t_net) x = paddle.transpose(x, (0, 2, 1)) x = self.mlp_1(x) t_net = self.feature_transform_net(x) t_net = paddle.squeeze(t_net, axis=-1) t_net = self.feature_fc(t_net) t_net = paddle.reshape(t_net, [batchsize, 64, 64]) x = paddle.squeeze(x, axis=-1) x = paddle.transpose(x, (0, 2, 1)) x = paddle.matmul(x, t_net) x = paddle.transpose(x, (0, 2, 1)) x = self.mlp_2(x) x = paddle.max(x, axis=-1) x = paddle.squeeze(x, axis=-1) x = self.fc(x) return x
def __init__(self, height=64, width=64, with_r=False, with_boundary=False): super(AddCoordsTh, self).__init__() self.with_r = with_r self.with_boundary = with_boundary with paddle.no_grad(): x_coords = paddle.arange(height).unsqueeze(1).expand( (height, width)).astype('float32') y_coords = paddle.arange(width).unsqueeze(0).expand( (height, width)).astype('float32') x_coords = (x_coords / (height - 1)) * 2 - 1 y_coords = (y_coords / (width - 1)) * 2 - 1 coords = paddle.stack([x_coords, y_coords], axis=0) # (2, height, width) if self.with_r: rr = paddle.sqrt( paddle.pow(x_coords, 2) + paddle.pow(y_coords, 2)) # (height, width) rr = (rr / paddle.max(rr)).unsqueeze(0) coords = paddle.concat([coords, rr], axis=0) self.coords = coords.unsqueeze(0) # (1, 2 or 3, height, width) self.x_coords = x_coords self.y_coords = y_coords
def entropy(self): """Shannon entropy in nats. Returns: Tensor: Shannon entropy of Categorical distribution. The data type is float32. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] cat = Categorical(x) cat.entropy() # [1.77528] """ name = self.name + '_entropy' logits = self.logits - \ paddle.max(self.logits, axis=-1, keepdim=True) e_logits = ops.exp(logits) z = paddle.sum(e_logits, axis=-1, keepdim=True) prob = e_logits / z neg_entropy = paddle.sum(prob * (logits - paddle.log(z)), axis=-1) entropy = paddle.scale(neg_entropy, scale=-1.0, name=name) return entropy
def forward(self, input, target): """ Args: inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ inputs = input["features"] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) bs = inputs.shape[0] # compute distance dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) dist = dist + dist.t() dist = paddle.addmm(input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) dist = paddle.clip(dist, min=1e-12).sqrt() # hard negative mining is_pos = paddle.expand(target, (bs, bs)).equal( paddle.expand(target, (bs, bs)).t()) is_neg = paddle.expand(target, (bs, bs)).not_equal( paddle.expand(target, (bs, bs)).t()) # `dist_ap` means distance(anchor, positive) ## both `dist_ap` and `relative_p_inds` with shape [N, 1] ''' dist_ap, relative_p_inds = paddle.max( paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = paddle.min( paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) ''' dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos), (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg), (bs, -1)), axis=1, keepdim=True) # shape [N] dist_ap = paddle.squeeze(dist_ap, axis=1) dist_an = paddle.squeeze(dist_an, axis=1) # Compute ranking hinge loss y = paddle.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return {"TripletLossV2": loss}
def test_imperative_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') x = paddle.to_tensor(np_x) z = paddle.max(x, axis=0) np_z = z.numpy() z_expected = np.array(np.max(np_x, axis=0)) self.assertEqual((np_z == z_expected).all(), True)