def rand_translation(x, ratio=0.125): shift_x, shift_y = int(x.shape[2] * ratio + 0.5), int(x.shape[3] * ratio + 0.5) translation_x = paddle.randint(-shift_x, shift_x + 1, shape=[x.shape[0], 1, 1]) translation_y = paddle.randint(-shift_y, shift_y + 1, shape=[x.shape[0], 1, 1]) grid_batch, grid_x, grid_y = paddle.meshgrid( paddle.arange(x.shape[0], dtype='int64'), paddle.arange(x.shape[2], dtype='int64'), paddle.arange(x.shape[3], dtype='int64'), ) grid_x = paddle.clip((grid_x + translation_x + 1).astype(x.dtype), 0, x.shape[2] + 1).astype('int64') grid_y = paddle.clip((grid_y + translation_y + 1).astype(x.dtype), 0, x.shape[3] + 1).astype('int64') x_pad = F.pad(x, [1, 1, 1, 1]) # TODO: Current version paddle doesn't support int64 Tensors indices # x = x_pad.transpose([0, 2, 3, 1])[grid_batch, grid_x, grid_y].transpose([0, 3, 1, 2]) indices = paddle.stack([grid_batch, grid_x, grid_y], -1) x = x_pad.transpose([0, 2, 3, 1]).gather_nd(indices).transpose([0, 3, 1, 2]) return x
def run_test_case(self): paddle.set_device('gpu') paddle.seed(100) x = paddle.randint(-10000, 10000, [32, 3, 1024, 1024], dtype='int32').numpy() self.assertTrue(x.mean(), -0.7517569760481516) self.assertTrue(x.std(), 5773.696619107639) expect = [2535, 2109, 5916, -5011, -261] self.assertTrue(np.array_equal(x[10, 0, 100, 100:105], expect)) expect = [3465, 7206, -8660, -9628, -6574] self.assertTrue(np.array_equal(x[20, 1, 600, 600:605], expect)) expect = [881, 1560, 1100, 9664, 1669] self.assertTrue(np.array_equal(x[30, 2, 1000, 1000:1005], expect)) x = paddle.randint(-10000, 10000, [32, 3, 1024, 1024], dtype='int64').numpy() self.assertTrue(x.mean(), -1.461287518342336) self.assertTrue(x.std(), 5773.023477548159) expect = [7213, -9597, 754, 8129, -1158] self.assertTrue(np.array_equal(x[10, 0, 100, 100:105], expect)) expect = [-7159, 8054, 7675, 6980, 8506] self.assertTrue(np.array_equal(x[20, 1, 600, 600:605], expect)) expect = [3581, 3420, -8027, -5237, -2436] self.assertTrue(np.array_equal(x[30, 2, 1000, 1000:1005], expect))
def forward(self, x): y = paddle.randint(low=0, high=5, shape=[1], dtype='int32') z = paddle.randint(low=0, high=5, shape=[1], dtype='int32') for i in range(0, z): x = x + i return x + y
def test_forward_elemwise(): class ElemwiseAPI(nn.Layer): def __init__(self, api_name): super(ElemwiseAPI, self).__init__() self.api_name_ = api_name for candidate in (paddle, paddle.nn.functional): self.func = getattr(candidate, api_name, None) if self.func: break @paddle.jit.to_static def forward(self, input1, input2): y = self.func(input1, input2) if "equal" in self.api_name_ or "than" in self.api_name_: # for compare operation, cast boolean result to int32 y = paddle.cast(y, "int32") return y api_list = [ "equal", ] x_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]] y_shapes = [[1], [8, 20], [4, 1, 1], [2, 3, 8, 8], [2, 3, 3, 9, 1]] for x_shape, y_shape in zip(x_shapes, y_shapes): x_data = paddle.randint(1, 1000, x_shape, dtype="int32") y_data = paddle.randint(1, 1000, y_shape, dtype="int32") for api_name in api_list: verify_model(ElemwiseAPI(api_name), [x_data, y_data])
def rand_cutout(x, ratio=0.5): cutout_size = int(x.shape[2] * ratio + 0.5), int(x.shape[3] * ratio + 0.5) offset_x = paddle.randint(0, x.shape[2] + (1 - cutout_size[0] % 2), shape=[x.shape[0], 1, 1]) offset_y = paddle.randint(0, x.shape[3] + (1 - cutout_size[1] % 2), shape=[x.shape[0], 1, 1]) # TODO: Current version paddle doesn't support int64 Tensors indices # grid_batch, grid_x, grid_y = paddle.meshgrid( # paddle.arange(x.shape[0], dtype='int64'), # paddle.arange(cutout_size[0], dtype='int64'), # paddle.arange(cutout_size[1], dtype='int64'), # ) # grid_x = paddle.clip((grid_x + offset_x - cutout_size[0] // 2).astype(x.dtype), min=0, max=x.shape[2] - 1).astype('int64') # grid_y = paddle.clip((grid_y + offset_y - cutout_size[1] // 2).astype(x.dtype), min=0, max=x.shape[3] - 1).astype('int64') # mask = paddle.ones([x.shape[0], x.shape[2], x.shape[3]], dtype=x.dtype) # mask[grid_batch, grid_x, grid_y] = 0 grid_batch, grid_x, grid_y = paddle.meshgrid( paddle.arange(x.shape[0], dtype='int64'), paddle.arange(x.shape[2], dtype='int64'), paddle.arange(x.shape[3], dtype='int64'), ) grid_x = grid_x + offset_x - cutout_size[0] // 2 grid_y = grid_y + offset_y - cutout_size[1] // 2 mask = 1 - ((grid_x >= 0).astype(x.dtype) * (grid_x < cutout_size[0]).astype(x.dtype) * (grid_y >= 0).astype(x.dtype) * (grid_y < cutout_size[1]).astype(x.dtype)).astype(x.dtype) x = x * mask.unsqueeze(1).detach() return x
def test_generator_ranint_static(self): fluid.disable_dygraph() gen = paddle.seed(123123143) startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # example 1: # attr shape is a list which doesn't contain tensor Variable. result_1 = paddle.randint(low=10, shape=[3, 4]) result_2 = paddle.randint(low=10, shape=[3, 4]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) out1 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) #gen.set_state(cur_state) gen.manual_seed(123123143) out2 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) out1_res1 = np.array(out1[0]) out1_res2 = np.array(out1[1]) out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) if not core.is_compiled_with_cuda(): print(">>>>>>> randint static >>>>>>>") self.assertTrue(np.allclose(out1_res1, out2_res1)) self.assertTrue(np.allclose(out1_res2, out2_res2)) self.assertTrue(not np.allclose(out1_res2, out1_res1))
def test_api(self): startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # results are from [0, 5). output1 = paddle.randint(5) # shape is a list and dtype is 'int32' output2 = paddle.randint( low=-100, high=100, shape=[64, 64], dtype='int32') # shape is a tuple and dtype is 'int64' output3 = paddle.randint( low=-100, high=100, shape=(32, 32, 3), dtype='int64') # shape is a tensorlist and dtype is 'float32' dim_1 = fluid.layers.fill_constant([1], "int64", 32) dim_2 = fluid.layers.fill_constant([1], "int32", 50) output4 = paddle.randint( low=-100, high=100, shape=[dim_1, 5], dtype='int32') # shape is a tensor and dtype is 'float64' var_shape = fluid.data(name='var_shape', shape=[2], dtype="int64") output5 = paddle.randint( low=1, high=1000, shape=var_shape, dtype='int64') place = fluid.CPUPlace() if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) outs = exe.run( train_program, feed={'var_shape': np.array([100, 100]).astype('int64')}, fetch_list=[output1, output2, output3, output4, output5])
def get_params(transform_num: int) -> Tuple[int, Tensor, Tensor]: """Get parameters for autoaugment transformation Returns: params required by the autoaugment transformation """ policy_id = int(paddle.randint(low=0, high=transform_num, shape=(1, ))) probs = paddle.rand((2, )) signs = paddle.randint(low=0, high=2, shape=(2, )) return policy_id, probs, signs
def meshgrid(): paddle.disable_static() @paddle.jit.to_static def test_model(x, y, z): return paddle.meshgrid(x, y, z) x = paddle.randint(low=0, high=100, shape=[5]) y = paddle.randint(low=0, high=100, shape=[3]) z = paddle.randint(low=0, high=100, shape=[2]) return exportModel('meshgrid', test_model, [x, y, z], target_dir=sys.argv[1])
def setup_class(cls): cls.config_file_path = "/workspace/models/nlp/chinese_wwm_ext/bert_config.json" cls.tf_checkpoint_path = "/workspace/models/nlp/chinese_wwm_ext/bert_model.ckpt" cls.huggingface_model_path = "/workspace/models/nlp/chinese_wwm_ext" cls.model_path = "/workspace/models/nlp/chinese_wwm_ext/bert_model_pd.bin" model_cfg = dict( type="PDBertForPreTraining", config=dict(type="ConfigBase", json_file=cls.config_file_path), ) cls.config = build_config(model_cfg["config"]) cls.model_tf = build_pd_models(model_cfg) cls.model_hf = build_pd_models(model_cfg) cls.model_base = transformers.BertModel.from_pretrained( cls.huggingface_model_path) cls.model_base.eval() cls.model_base_mlm = transformers.BertForPreTraining.from_pretrained( cls.huggingface_model_path) cls.model_base_mlm.eval() model_cfg.update({"model_path": cls.model_path}) cls.model = build_pd_models(model_cfg) cls.model.eval() cls.batch_size = 4 cls.seq_length = 10 cls.tokens_tensor = { "input_ids": paddle.randint( low=1, high=100, shape=(cls.batch_size, cls.seq_length), dtype=paddle.int64), "attention_mask": paddle.randint( low=0, high=2, shape=(cls.batch_size, cls.seq_length), dtype=paddle.int64), "token_type_ids": paddle.randint( low=0, high=2, shape=(cls.batch_size, cls.seq_length), dtype=paddle.int64), "position_ids": paddle.randint( low=0, high=cls.seq_length, shape=(cls.batch_size, cls.seq_length), dtype=paddle.int64, ), }
def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]: """Get parameters for ``crop`` for a random sized crop. Args: img (PIL Image or Tensor): Input image. scale (list): range of scale of the origin size cropped ratio (list): range of aspect ratio of the origin aspect ratio cropped Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for a random sized crop. """ width, height = F._get_image_size(img) area = height * width log_ratio = paddle.log(paddle.to_tensor(ratio)) for _ in range(10): target_area = area * paddle.uniform( shape=[1], min=scale[0], max=scale[1]).numpy().item() aspect_ratio = paddle.exp( paddle.uniform(shape=[1], min=log_ratio[0], max=log_ratio[1])).numpy().item() w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: i = paddle.randint(0, height - h + 1, shape=(1, )).numpy().item() j = paddle.randint(0, width - w + 1, shape=(1, )).numpy().item() return i, j, h, w # Fallback to central crop in_ratio = float(width) / float(height) if in_ratio < min(ratio): w = width h = int(round(w / min(ratio))) elif in_ratio > max(ratio): h = height w = int(round(h * max(ratio))) else: # whole image w = width h = height i = (height - h) // 2 j = (width - w) // 2 return i, j, h, w
def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): fleet.init(is_collective=True) np.random.seed(2020) # (num_embeddings, embedding_dim) = (12, 8) size = (12, 8) np_array = np.random.rand(size[0], size[1]) paddle.seed(2020) data_in = paddle.randint(0, size[0], shape=(10, 4)) data = paddle.static.data( name='tindata', shape=[10, 1000], dtype="float32") per_part_size = size[0] // 2 if rank == 0: param_attr = paddle.fluid.ParamAttr( initializer=paddle.fluid.initializer.NumpyArrayInitializer( np_array[0:per_part_size, :]), ) else: param_attr = paddle.fluid.ParamAttr( initializer=paddle.fluid.initializer.NumpyArrayInitializer( np_array[per_part_size:size[0], :]), ) emb_out = paddle.distributed.split( data_in, size, operation="embedding", num_partitions=2, weight_attr=param_attr) return [data_in, emb_out]
def _mask_tokens(self, inputs, special_tokens_mask, mask_token_id, token_len, mlm_prob=0.15, ignore_label=-100): """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """ labels = inputs.clone() probability_matrix = paddle.full(labels.shape, mlm_prob) probability_matrix[special_tokens_mask] = 0 masked_indices = paddle.cast( paddle.bernoulli(probability_matrix), dtype=bool) labels[ ~masked_indices] = ignore_label # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = paddle.cast( paddle.bernoulli(paddle.full(labels.shape, 0.8)), dtype=bool) & masked_indices inputs[indices_replaced] = mask_token_id # 10% of the time, we replace masked input tokens with random word indices_random = paddle.cast( paddle.bernoulli(paddle.full(labels.shape, 0.5)), dtype=bool) & masked_indices & ~indices_replaced random_words = paddle.randint(low=0, high=token_len, shape=labels.shape) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
def forward(self, feed_dict): src_embed = self.embedding(feed_dict['src']) pos_embed = self.embedding(feed_dict['pos']) # batch neg sample batch_size = feed_dict['pos'].shape[0] neg_idx = paddle.randint(low=0, high=batch_size, shape=[batch_size, self.neg_num]) negs = [] for i in range(self.neg_num): tmp = paddle.gather(pos_embed, neg_idx[:, i]) tmp = paddle.reshape(tmp, [-1, 1, self.embed_size]) negs.append(tmp) neg_embed = paddle.concat(negs, axis=1) src_embed = paddle.reshape(src_embed, [-1, 1, self.embed_size]) pos_embed = paddle.reshape(pos_embed, [-1, 1, self.embed_size]) # [batch_size, 1, 1] pos_logits = paddle.matmul(src_embed, pos_embed, transpose_y=True) # [batch_size, 1, neg_num] neg_logits = paddle.matmul(src_embed, neg_embed, transpose_y=True) ones_label = paddle.ones_like(pos_logits) pos_loss = self.loss_fn(pos_logits, ones_label) zeros_label = paddle.zeros_like(neg_logits) neg_loss = self.loss_fn(neg_logits, zeros_label) loss = (pos_loss + neg_loss) / 2 return loss
def __getitem__(self, idx): if self.balance_sampling: keys = [] cls_id = idx % c['num_classes'] keys = self.clsidx2keys_local[int(cls_id)] while len(keys) == 0: cls_id = int(paddle.randint(0, 527, (1, ))) keys = self.clsidx2keys_local[int(cls_id)] k = random_choice(keys) cls_ids = ytid2clsidx[k] else: k = self.local_keys[idx] cls_ids = ytid2clsidx[k] y = np.zeros((c['num_classes'], ), 'float32') for l in cls_ids: y[l] = 1.0 x = self.h5_data[k][:, :] if self.padding: if x.shape[1] <= c['max_mel_len']: pad_width = c['max_mel_len'] - x.shape[1] + 1 x = np.pad(x, ((0, 0), (pad_width // 2, pad_width // 2 + 1))) x = x[:, :c['max_mel_len']] return x.T, y
def farthest_point_sample(xyz, npoint): """ Input: xyz: pointcloud data, [B, N, 3] npoint: number of samples Return: centroids: sampled pointcloud index, [B, npoint] """ B, N, C = xyz.shape centroids = paddle.zeros([B, npoint]) distance = paddle.ones([B, N]) farthest = paddle.randint(0, N, (B,)) batch_indices = paddle.arange(B) for i in range(npoint): centroids[:, i] = farthest xyz_np = xyz.numpy() batch_indices_np = batch_indices.numpy().astype('int64') farthest_np = farthest.numpy().astype('int64') centroid = xyz_np[batch_indices_np, farthest_np, :] centroid = paddle.to_tensor(centroid).unsqueeze(1) dist = paddle.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance_np = distance.numpy() dist_np = dist.numpy() mask_np = mask.numpy() distance_np[mask_np] = dist_np[mask_np] distance = paddle.to_tensor(distance_np) farthest = paddle.argmax(distance, -1) return centroids
def set_input(self, input): """Unpack input data from the dataloader and perform necessary pre-processing steps. Parameters: input (list): include the data itself and its metadata information. """ if isinstance(input, (list, tuple)): input = input[0] if not isinstance(input, dict): input = {'img': input} self.D_real_inputs = [paddle.to_tensor(input['img'])] if 'class_id' in input: # n class input self.n_class = self.nets['netG'].n_class self.D_real_inputs += [ paddle.to_tensor(input['class_id'], dtype='int64') ] else: self.n_class = 0 batch_size = self.D_real_inputs[0].shape[0] self.G_inputs = self.nets['netG'].random_inputs(batch_size) if not isinstance(self.G_inputs, (list, tuple)): self.G_inputs = [self.G_inputs] if not hasattr(self, 'G_fixed_inputs'): self.G_fixed_inputs = [t for t in self.G_inputs] if self.n_class > 0: rows_num = (batch_size - 1) // self.samples_every_row + 1 class_ids = paddle.randint(0, self.n_class, [rows_num, 1]) class_ids = class_ids.tile([1, self.samples_every_row]) class_ids = class_ids.reshape([ -1, ])[:batch_size].detach() self.G_fixed_inputs[1] = class_ids.detach()
def test_forward_cumsum(): @paddle.jit.to_static def cusum1(inputs): return paddle.cumsum(inputs) @paddle.jit.to_static def cusum2(inputs): return paddle.cumsum(inputs, axis=0) @paddle.jit.to_static def cusum3(inputs): return paddle.cumsum(inputs, axis=1) input_data = paddle.randint(0, 100, (10, 10), dtype=paddle.int32) verify_model(cusum1, [input_data]) verify_model(cusum1, [input_data.astype(paddle.int64)]) verify_model( cusum2, [ input_data, ], ) verify_model( cusum3, [ input_data, ], )
def run_test_case(self): n = 10 x1 = paddle.randint(n, shape=[10], dtype="int32") x2 = paddle.tensor.randint(n) x3 = paddle.tensor.random.randint(n) for i in [x1, x2, x3]: for j in i.numpy().tolist(): self.assertTrue((j >= 0 and j < n))
def __getitem__(self, index): """ 步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签) """ data = paddle.uniform(IMAGE_SIZE, dtype='float32') label = paddle.randint(0, CLASS_NUM - 1, dtype='int64') return data, label
def data_outputsize_error2(): data = paddle.randint(shape=[1, 1, 3, 3]) indices = paddle.reshape(paddle.arange(4, 40), shape[1, 1, 3, 4]) MaxPool2D = F.maxunpool2d(data, indices, kernel_size=2, stride=2, output_size=[100, 100])
def __getitem__(self, index): data = paddle.uniform(IMAGE_SIZE, dtype='float32') # 在 `__getitem__` 中对数据集使用数据增强方法 # data = self.transform(data.numpy()) label = paddle.randint(0, CLASS_NUM - 1, dtype='int64') return data, label
def data_format_error(): data = paddle.randint(shape=[1, 1, 3, 3]) indices = paddle.reshape(paddle.arange(4, 40), shape[1, 1, 3, 4]) MaxPool2D = F.maxunpool2d(data, indices, kernel_size=2, stride=2, data_format="NHWC")
def randint(low, high, size, dtype="int64", requires_grad=False): return Tensor( paddle.randint(low, high=high, shape=size, out=None, dtype=dtype, device=None, stop_gradient=not requires_grad))
def test_api(self): n = 10 paddle.disable_static() x1 = paddle.randint(n, shape=[10], dtype="int32") x2 = paddle.tensor.randint(n) x3 = paddle.tensor.random.randint(n) for i in [x1, x2, x3]: for j in i.numpy().tolist(): self.assertTrue((j >= 0 and j < n)) paddle.enable_static()
def test_generator_randint_dygraph_1(self): """Test Generator seed.""" fluid.enable_dygraph() gen = paddle.seed(12312321111) x = paddle.randint(low=1) st1 = gen.get_state() x1 = paddle.randint(low=1) gen.set_state(st1) x2 = paddle.randint(low=1) gen.manual_seed(12312321111) x3 = paddle.randint(low=1) x_np = x.numpy() x1_np = x1.numpy() x2_np = x2.numpy() x3_np = x3.numpy() if not core.is_compiled_with_cuda(): self.assertTrue(np.allclose(x1_np, x2_np)) self.assertTrue(np.allclose(x_np, x3_np))
def test_forward_gather_nd(): class GatherNd(nn.Layer): @paddle.jit.to_static def forward(self, x, index): return paddle.gather_nd(x, index) x_shapes = [[20], [8, 8], [4, 5, 6], [3, 4, 3, 5]] y_shapes = [[2, 1], [2], [1, 2, 3], [3]] for x_shape, y_shape in zip(x_shapes, y_shapes): x_data = paddle.rand(x_shape, dtype="float32") y_data = paddle.randint(low=0, high=3, shape=y_shape, dtype="int64") verify_model(GatherNd(), [x_data, y_data])
def test_forward_logical_not(): class LogicalNot(nn.Layer): def __init__(self): super(LogicalNot, self).__init__() @paddle.jit.to_static def forward(self, x): return paddle.logical_not(x).astype("int32") input_shapes = [[128], [8, 20], [4, 20, 3], [2, 3, 8, 8], [2, 3, 3, 9, 9]] for input_shape in input_shapes: input_data = paddle.randint(-2, 2, input_shape).astype("bool") verify_model(LogicalNot(), input_data)
def test_generator_randint_dygraph(self): """Test Generator seed.""" fluid.enable_dygraph() gen = paddle.seed(12312321111) x = paddle.randint(low=10, shape=[10], dtype="int32") st1 = gen.get_state() x1 = paddle.randint(low=10, shape=[10], dtype="int32") gen.set_state(st1) x2 = paddle.randint(low=10, shape=[10], dtype="int32") paddle.seed(12312321111) x3 = paddle.randint(low=10, shape=[10], dtype="int32") x_np = x.numpy() x1_np = x1.numpy() x2_np = x2.numpy() x3_np = x3.numpy() if core.is_compiled_with_cuda(): print(">>>>>>> randint dygraph >>>>>>>") self.assertTrue(np.allclose(x1_np, x2_np)) self.assertTrue(np.allclose(x_np, x3_np))
def do_predict(args): place = "gpu" paddle.set_device(place) use_batch_major_op_cache = True size_per_head = args.d_model // args.n_head use_batch_major_op_cache, x = get_op_cache_config(use_batch_major_op_cache, size_per_head, args.use_fp16_decoder) # Define model transformer = FasterDecoder( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.num_encoder_layers, num_decoder_layers=args.num_decoder_layers, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, max_out_len=args.max_out_len, decoder_lib=args.decoder_lib, use_fp16_decoder=args.use_fp16_decoder, use_batch_major_op_cache=use_batch_major_op_cache) # Load checkpoint. transformer.load( os.path.join(args.init_from_params, "transformer.pdparams")) # Set evaluate mode transformer.eval() # Generate src_word randomly src_word = paddle.randint(0, args.src_vocab_size, shape=[args.infer_batch_size, args.max_length], dtype='int64') with paddle.no_grad(): for i in range(100): # For warmup. if 50 == i: start = time.time() paddle.device.cuda.synchronize() finished_seq, finished_scores = transformer(src_word=src_word) paddle.device.cuda.synchronize() logger.info("Average test time for decoder is %f ms" % ((time.time() - start) / 50 * 1000))