def get_random_walk_noise_for_position_sequence(position_sequence, noise_std_last_step): """Returns random-walk noise in the velocity applied to the position.""" velocity_sequence = learned_simulator.time_diff(position_sequence) # input_sequence[:, 1:] - input_sequence[:, :-1] # We want the noise scale in the velocity at the last step to be fixed. # Because we are going to compose noise at each step using a random_walk: # std_last_step**2 = num_velocities * std_each_step**2 # so to keep `std_last_step` fixed, we apply at each step: # std_each_step `std_last_step / np.sqrt(num_input_velocities)` # TODO(alvarosg): Make sure this is consistent with the value and # description provided in the paper. num_velocities = velocity_sequence.shape.as_list()[1] velocity_sequence_noise = paddle.randn( shape=paddle.shape(velocity_sequence), dtype=position_sequence.dtype) # Apply the random walk. # paddle.cumsum()即沿着tensor(张量)x的某一个维度axis,计算累积和 velocity_sequence_noise = paddle.cumsum(velocity_sequence_noise, axis=1) # Integrate the noise in the velocity to the positions, assuming # an Euler intergrator and a dt = 1, and adding no noise to the very first # position (since that will only be used to calculate the first position # change). position_sequence_noise = paddle.concat([ paddle.zeros_like(velocity_sequence_noise[:, 0:1]), paddle.cumsum(velocity_sequence_noise, axis=1) ], axis=1) return position_sequence_noise
def lovasz_grad(gt_sorted): """ Computes gradient of the Lovasz extension w.r.t sorted errors. See Alg. 1 in paper. """ gts = paddle.sum(gt_sorted) p = len(gt_sorted) intersection = gts - paddle.cumsum(gt_sorted, axis=0) union = gts + paddle.cumsum(1 - gt_sorted, axis=0) jaccard = 1.0 - intersection.cast('float32') / union.cast('float32') if p > 1: # cover 1-pixel case jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] return jaccard
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") if self.num_partitions == 1: input_embeddings = self.word_embeddings(input_ids) else: input_embeddings = paddle.distributed.split(input_ids, size=(self.vocab_size, self.hidden_size), operation='embedding', weight_attr=self.weight_attr, num_partitions=fleet.worker_num()) # paddle.static.Print(input_embeddings, summarize=-1) position_embeddings = self.position_embeddings(position_ids) # paddle.static.Print(position_embeddings, summarize=-1) token_type_embeddings = self.token_type_embeddings(token_type_ids) # paddle.static.Print(token_type_embeddings, summarize=-1) embeddings = input_embeddings + position_embeddings + token_type_embeddings # paddle.static.Print(embeddings, summarize=-1) embeddings = self.layer_norm(embeddings) # paddle.static.Print(embeddings, summarize=-1) embeddings = self.dropout(embeddings) # paddle.static.Print(embeddings) return embeddings
def top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-float('Inf')): """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering Args: logits: logits distribution shape (vocabulary size) top_k > 0: keep only top k tokens with highest probability (top-k filtering). top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering). Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751) From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317 """ top_k = min(top_k, logits.shape[-1]) # Safety check logits_np = logits.numpy() if top_k > 0: # Remove all tokens with a probability less than the last token of the top-k indices_to_remove = logits_np < np.sort(logits_np)[-top_k] logits_np[indices_to_remove] = filter_value if top_p < 1.0: sorted_logits = paddle.sort(logits, descending=True) sorted_indices = paddle.argsort(logits, descending=True).numpy() cumulative_probs = paddle.cumsum(paddle.nn.functional.softmax(sorted_logits, axis=-1), axis=-1).numpy() # Remove tokens with cumulative probability above the threshold sorted_indices_to_remove = cumulative_probs > top_p # Shift the indices to the right to keep also the first token above the threshold sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1] sorted_indices_to_remove[..., 0] = 0 indices_to_remove = sorted_indices[sorted_indices_to_remove] logits_np[indices_to_remove] = filter_value return paddle.to_tensor(logits_np)
def test_sample_result_fisher_yates_sampling(self): paddle.disable_static() if fluid.core.is_compiled_with_cuda(): row = paddle.to_tensor(self.row) colptr = paddle.to_tensor(self.colptr) nodes = paddle.to_tensor(self.nodes) perm_buffer = paddle.to_tensor(self.edges_id) out_neighbors, out_count = paddle.incubate.graph_sample_neighbors( row, colptr, nodes, perm_buffer=perm_buffer, sample_size=self.sample_size, flag_perm_buffer=True) out_count_cumsum = paddle.cumsum(out_count) for i in range(len(out_count)): if i == 0: neighbors = out_neighbors[0:out_count_cumsum[i]] else: neighbors = out_neighbors[out_count_cumsum[i - 1]: out_count_cumsum[i]] # Ensure the correct sample size. self.assertTrue( out_count[i] == self.sample_size or out_count[i] == len(self.dst_src_dict[self.nodes[i]])) # Ensure no repetitive sample neighbors. self.assertTrue( neighbors.shape[0] == paddle.unique(neighbors).shape[0]) # Ensure the correct sample neighbors. in_neighbors = np.isin(neighbors.numpy(), self.dst_src_dict[self.nodes[i]]) self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])
def test_sample_result(self): paddle.disable_static() row = paddle.to_tensor(self.row) colptr = paddle.to_tensor(self.colptr) nodes = paddle.to_tensor(self.nodes) out_neighbors, out_count = paddle.incubate.graph_sample_neighbors( row, colptr, nodes, sample_size=self.sample_size) out_count_cumsum = paddle.cumsum(out_count) for i in range(len(out_count)): if i == 0: neighbors = out_neighbors[0:out_count_cumsum[i]] else: neighbors = out_neighbors[out_count_cumsum[i - 1]: out_count_cumsum[i]] # Ensure the correct sample size. self.assertTrue( out_count[i] == self.sample_size or out_count[i] == len(self.dst_src_dict[self.nodes[i]])) # Ensure no repetitive sample neighbors. self.assertTrue( neighbors.shape[0] == paddle.unique(neighbors).shape[0]) # Ensure the correct sample neighbors. in_neighbors = np.isin(neighbors.numpy(), self.dst_src_dict[self.nodes[i]]) self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0])
def forward(self, input_ids, token_type_ids=None, position_ids=None, task_type_ids=None): if position_ids is None: # maybe need use shape op to unify static graph and dynamic graph #seq_length = input_ids.shape[1] ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = input_embedings + position_embeddings + token_type_embeddings if self.use_task_id: if task_type_ids is None: task_type_ids = paddle.ones_like(input_ids, dtype="int64") * self.task_id task_type_embeddings = self.task_type_embeddings(task_type_ids) embeddings = embeddings + task_type_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def cumsum(name: str, x, axis, dtype=None): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data = paddle.static.data(name='x', shape=x.shape, dtype=data_type) out = paddle.cumsum(data, axis, dtype=dtype) out = paddle.cast(out, np.float32) cpu = paddle.static.cpu_places(1) exe = paddle.static.Executor(cpu[0]) # startup program will call initializer to initialize the parameters. exe.run(paddle.static.default_startup_program()) outs = exe.run(feed={'x': x}, fetch_list=[out]) saveModel(name, exe, feedkeys=['x'], fetchlist=[out], inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1]) return outs[0]
def TopPProcess(probs, top_p, min_tokens_to_keep): sorted_probs = paddle.sort(probs, descending=True) sorted_indices = paddle.argsort(probs, descending=True) cumulative_probs = paddle.cumsum(sorted_probs, axis=-1) # Remove tokens with cumulative probs above the top_p, But keep at # least min_tokens_to_keep tokens sorted_indices_to_remove = cumulative_probs > top_p if min_tokens_to_keep > 1: # Set 'min_tokens_to_keep - 1' because the first token is kept sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0 # Keep the first token sorted_indices_to_remove = paddle.cast(sorted_indices_to_remove, dtype='int64') sorted_indices_to_remove[:, 1:] = ( sorted_indices_to_remove[:, :-1].clone()) sorted_indices_to_remove[:, 0] = 0 # Scatter sorted tensors to original indexing sorted_indices = sorted_indices + paddle.arange( probs.shape[0]).unsqueeze(-1) * probs.shape[-1] condition = paddle.scatter(sorted_indices_to_remove.flatten(), sorted_indices.flatten(), sorted_indices_to_remove.flatten()) condition = paddle.cast(condition, 'bool').reshape(probs.shape) probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs) return probs
def get_index_from_counts(counts): """Return index generated from counts This function return the index from given counts. For example, when counts = [ 2, 3, 4], return [0, 2, 5, 9] Args: counts: numpy.ndarray of paddle.Tensor Return: Return idnex of the counts """ if check_is_tensor(counts): index = paddle.concat( [paddle.zeros(shape=[ 1, ], dtype="int64"), paddle.cumsum(counts)], axis=-1) else: index = np.cumsum(counts, dtype="int64") index = np.insert(index, 0, 0) return index
def forward(self, input_ids, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones input_embedings = self.word_embeddings(input_ids) if _global_parallel_strategy == "mp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": _global_process_mesh, "dims_mapping": [0, -1] }) elif _global_parallel_strategy == "dp_mp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": _global_process_mesh, "dims_mapping": [1, -1] }) elif _global_parallel_strategy == "mp_pp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": MPPP_MESH_LIST[0], "dims_mapping": [0, -1] }) elif _global_parallel_strategy == "dp_mp_pp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": DPMPPP_MESH_LIST[0], "dims_mapping": [1, -1] }) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings embeddings = self.dropout(embeddings) return embeddings
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() #get cmc choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') Ns = paddle.arange(gallery_img_id.shape[0]) + 1 equal_flag_cumsum = paddle.cumsum(equal_flag, axis=1) Precision_at_k = (paddle.mean(equal_flag_cumsum, axis=0) / Ns).numpy() for k in self.topk: metric_dict["precision@{}".format(k)] = Precision_at_k[k - 1] return metric_dict
def get_pooled_embedding(self, input_ids, token_type_ids=None, position_ids=None): src_mask = input_ids == self.bos_id src_mask = paddle.cast(src_mask, "float32") # [bs, 1, 1, max_len] src_mask = paddle.unsqueeze(src_mask, axis=[1, 2]) src_mask.stop_gradient = True ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True embedding_output = self.ptm.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids) if self.use_fp16: embedding_output = paddle.cast(embedding_output, 'float16') sequence_output = self.ptm.encoder(embedding_output, src_mask) if self.use_fp16: sequence_output = paddle.cast(sequence_output, 'float32') cls_embedding = self.ptm.pooler(sequence_output) if self.output_emb_size > 0: cls_embedding = self.emb_reduce_linear(cls_embedding) cls_embedding = self.dropout(cls_embedding) cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) return cls_embedding
def ernie_send(self, src_feat, dst_feat, edge_feat): """ Apply ernie model on the edge. Args: src_feat (Tensor Dict): src feature tensor dict. dst_feat (Tensor Dict): dst feature tensor dict. edge_feat (Tensor Dict): edge feature tensor dict. Returns: Tensor Dict: tensor dict which use 'msg' as the key. """ # input_ids cls = paddle.full(shape=[src_feat["term_ids"].shape[0], 1], dtype="int64", fill_value=self.cls_token_id) src_ids = paddle.concat([cls, src_feat["term_ids"]], 1) dst_ids = dst_feat["term_ids"] # sent_ids sent_ids = paddle.concat( [paddle.zeros_like(src_ids), paddle.ones_like(dst_ids)], 1) term_ids = paddle.concat([src_ids, dst_ids], 1) # build position_ids input_mask = paddle.cast(term_ids > 0, "int64") position_ids = paddle.cumsum(input_mask, axis=1) - 1 outputs = self.ernie(term_ids, sent_ids, position_ids) feature = outputs[1] return {"msg": feature}
def forward(self, inputs): """ forward """ x = paddle.cumsum(inputs, axis=self.config["axis"], dtype=self.config["dtype"]) return x
def generate_segment_id(index): zeros = paddle.zeros(index[-1] + 1, dtype="int32") index = index[:-1] segments = paddle.scatter( zeros, index, paddle.ones_like( index, dtype="int32"), overwrite=False) segments = paddle.cumsum(segments)[:-1] - 1 return segments
def _get_batch_seq_index(self, batch_size, length): if self._batch_seq_index is None or length + 2 > self._batch_seq_index.shape[ 1] or batch_size > self._batch_seq_index.shape[0]: self._batch_seq_index = paddle.cumsum( paddle.ones([batch_size, length + 2], "int64"), axis=1) - 1 if self.with_start_stop_tag: return self._batch_seq_index[:batch_size, :length + 2] else: return self._batch_seq_index[:batch_size, :length]
def forward(self, input_ids, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings embeddings = self.dropout(embeddings) return embeddings
def segment_padding(data, segment_ids): """ Segment padding operator. This operator padding the input elements which with the same index in 'segment_ids' to a common length , and reshape its into [uniq_segment_id, max_padding, dim]. Args: data (tensor): a tensor, available data type float32, float64. segment_ids (tensor): a 1-d tensor, which have the same size with the first dimension of input data. available data type is int32, int64. Returns: output (Tensor): the padding result with shape [uniq_segment_id, max_padding, dim]. seq_len (Tensor): the numbers of elements grouped same segment_ids max_padding: the max number of elements grouped by same segment_ids Examples: .. code-block:: python import paddle import pgl data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') segment_ids = paddle.to_tensor([0, 0, 1], dtype='int64') out = pgl.math.segment_softmax(data, segment_ids) #Outputs: [[[1., 2., 3.], [3., 2., 1.]], [[4., 5., 6.], [0., 0., 0.]]], [2,1], 2 """ idx_a = segment_ids idx_b = paddle.arange(segment_ids.shape[0]) temp_idx = paddle.ones([segment_ids.shape[0]], dtype='float32') temp_idx = segment_sum(temp_idx, segment_ids).astype('int32') seq_len = temp_idx max_padding = temp_idx.max().numpy()[0] temp_idx = paddle.cumsum(temp_idx) temp_idx_i = paddle.zeros([temp_idx.shape[0] + 1], dtype='int32') temp_idx_i[1: ] = temp_idx temp_idx = temp_idx_i[: -1] temp_idx = paddle.gather(temp_idx, segment_ids) idx_b = idx_b - temp_idx index = paddle.stack([idx_a, idx_b], axis=1) bz = segment_ids.max().numpy()[0] + 1 shape = [bz, max_padding, data.shape[-1]] output = paddle.scatter_nd(index, data, shape) return output, seq_len, index
def flat_words(words, pad_index=0): mask = words != pad_index lens = paddle.sum(paddle.cast(mask, "int64"), axis=-1) position = paddle.cumsum( lens + paddle.cast((lens == 0), "int64"), axis=1) - 1 select = paddle.nonzero(mask) words = paddle.gather_nd(words, select) lens = paddle.sum(lens, axis=-1) words = pad_sequence_paddle(words, lens, pad_index) max_len = words.shape[1] position = mask_fill(position, position >= max_len, max_len - 1) return words, position
def run_static(self, use_npu=False): with fluid.program_guard(fluid.Program()): data_np = np.random.random((100, 100)).astype(np.float32) x = paddle.static.data('X', [100, 100]) y = paddle.cumsum(x) y2 = paddle.cumsum(x, axis=0) y3 = paddle.cumsum(x, axis=-1) y4 = paddle.cumsum(x, dtype='float32') y5 = paddle.cumsum(x, dtype=np.int32) y6 = paddle.cumsum(x, axis=-2) place = fluid.NPUPlace(0) if use_npu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) out = exe.run(feed={'X': data_np}, fetch_list=[ y.name, y2.name, y3.name, y4.name, y5.name, y6.name ]) z = np.cumsum(data_np) self.assertTrue(np.allclose(z, out[0])) z = np.cumsum(data_np, axis=0) self.assertTrue(np.allclose(z, out[1])) z = np.cumsum(data_np, axis=-1) self.assertTrue(np.allclose(z, out[2])) self.assertTrue(out[3].dtype == np.float32) self.assertTrue(out[4].dtype == np.int32) z = np.cumsum(data_np, axis=-2) self.assertTrue(np.allclose(z, out[5]))
def run_cases(self): data_np = np.arange(12).reshape(3, 4) data = paddle.to_tensor(data_np) y = paddle.cumsum(data) z = np.cumsum(data_np) self.assertTrue(np.array_equal(z, y.numpy())) y = paddle.cumsum(data, axis=0) z = np.cumsum(data_np, axis=0) self.assertTrue(np.array_equal(z, y.numpy())) y = paddle.cumsum(data, axis=-1) z = np.cumsum(data_np, axis=-1) self.assertTrue(np.array_equal(z, y.numpy())) y = paddle.cumsum(data, dtype='float32') self.assertTrue(y.dtype == core.VarDesc.VarType.FP32) y = paddle.cumsum(data, dtype=np.int32) self.assertTrue(y.dtype == core.VarDesc.VarType.INT32) y = paddle.cumsum(data, axis=-2) z = np.cumsum(data_np, axis=-2) self.assertTrue(np.array_equal(z, y.numpy()))
def forward(self, input_ids, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids += 2 position_ids.stop_gradient = True input_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embeddings + position_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, input_ids, bbox=None, token_type_ids=None, position_ids=None): #input_shape = input_ids.size() #seq_length = input_shape[1] if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") word_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) # gry add try: left_position_embeddings = self.x_position_embeddings(bbox[:, :, 0]) upper_position_embeddings = self.y_position_embeddings(bbox[:, :, 1]) right_position_embeddings = self.x_position_embeddings(bbox[:, :, 2]) lower_position_embeddings = self.y_position_embeddings(bbox[:, :, 3]) except IndexError as e: raise IndexError( "The :obj:`bbox`coordinate values should be within 0-1000 range." ) from e h_position_embeddings = self.h_position_embeddings(bbox[:, :, 3] - bbox[:, :, 1]) w_position_embeddings = self.w_position_embeddings(bbox[:, :, 2] - bbox[:, :, 0]) # end of gry add token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = (word_embeddings + position_embeddings + left_position_embeddings + upper_position_embeddings + right_position_embeddings + lower_position_embeddings + h_position_embeddings + w_position_embeddings + token_type_embeddings) embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, input_ids, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings with get_rng_state_tracker().rng_state('global_seed'): embeddings = self.dropout(embeddings) return embeddings
def generate_segment_id_from_index(index): if check_is_tensor(index): zeros = paddle.zeros(index[-1] + 1, dtype="int32") index = index[:-1] segments = scatter(zeros, index, paddle.ones_like(index, dtype="int32")) segments = paddle.cumsum(segments)[:-1] - 1 return segments else: segments = np.zeros(index[-1] + 1, dtype="int32") index = index[:-1] segments[index] += 1 segments = np.cumsum(segments)[:-1] - 1 return segments
def count_by_gate(gate, num_expert, world_size, require_pos=True, group=None): total_expert_count = num_expert * world_size with paddle.no_grad(): local_expert_count = _number_count(gate, total_expert_count) if world_size > 1: global_expert_count = _alltoall(local_expert_count, group=group) else: global_expert_count = local_expert_count if not require_pos: pos = None else: lec_cum = paddle.cumsum(local_expert_count, axis=0) pos = _assign_pos(gate, lec_cum) return pos, local_expert_count, global_expert_count
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = input_embedings + position_embeddings + token_type_embeddings embeddings = self.dropout(embeddings) return embeddings
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: # maybe need use shape op to unify static graph and dynamic graph ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) if self.cls_token_id == 0 or input_ids[0][ 0] == 0: # postion_ids for RobertaBPETokenizer position_ids = seq_length + self.padding_idx + 1 - ones else: # postion_ids for RobertaTokenizer position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = input_embedings + position_embeddings + token_type_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') num_rel = paddle.sum(equal_flag, axis=1) num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) num_rel_index = paddle.nonzero(num_rel.astype("int")) num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) acc_sum = paddle.cumsum(equal_flag, axis=1) div = paddle.arange(acc_sum.shape[1]).astype("float32") + 1 precision = paddle.divide(acc_sum, div) #calc map precision_mask = paddle.multiply(equal_flag, precision) ap = paddle.sum(precision_mask, axis=1) / paddle.sum(equal_flag, axis=1) metric_dict["mAP"] = paddle.mean(ap).numpy()[0] return metric_dict