def __reader__(): result = [[], []] for sample in sample_reader(): for i, fea in enumerate(sample): result[i].append(fea) if len(result[0]) == self.batch_size: tensor_result = [] for tensor in result: t = fluid.Tensor() dat = np.array(tensor, dtype='int64') if len(dat.shape) > 2: dat = dat.reshape((dat.shape[0], dat.shape[2])) elif len(dat.shape) == 1: dat = dat.reshape((-1, 1)) t.set(dat, fluid.CPUPlace()) tensor_result.append(t) if self.with_shuffle_batch: yield tensor_result else: tt = fluid.Tensor() neg_array = self.cs.searchsorted( np.random.sample(self.neg_num)) neg_array = np.tile(neg_array, self.batch_size) tt.set( neg_array.reshape((self.batch_size, self.neg_num)), fluid.CPUPlace()) tensor_result.append(tt) yield tensor_result result = [[], []]
def __reader__(): cs = np.array(weight).cumsum() result = [[], []] for sample in sample_reader(): for i, fea in enumerate(sample): result[i].append(fea) if len(result[0]) == batch_size: tensor_result = [] for tensor in result: t = fluid.Tensor() dat = np.array(tensor, dtype='int64') if len(dat.shape) > 2: dat = dat.reshape((dat.shape[0], dat.shape[2])) elif len(dat.shape) == 1: dat = dat.reshape((-1, 1)) t.set(dat, fluid.CPUPlace()) tensor_result.append(t) tt = fluid.Tensor() neg_array = cs.searchsorted(np.random.sample(args.nce_num)) neg_array = np.tile(neg_array, batch_size) tt.set(neg_array.reshape((batch_size, args.nce_num)), fluid.CPUPlace()) tensor_result.append(tt) yield tensor_result result = [[], []]
def imagenet_norm(x): mean = [0.485, 0.456, 0.406] std = [0.299, 0.224, 0.225] mean = fluid.Tensor(mean) mean = layers.unsqueeze(layers.unsqueeze(layers.unsqueeze(mean, 0), 2), 3) std = fluid.Tensor(std) std = mean = layers.unsqueeze(layers.unsqueeze(layers.unsqueeze(std, 0), 2), 3) return (x - mean) / std
def imagenet_norm(x): mean = fluid.Tensor(np.ndarray([0.485, 0.456, 0.406]), fluid.CUDAPlace()) std = fluid.Tensor(np.ndarray([0.299, 0.224, 0.225]), fluid.CUDAPlace()) mean = unsqueeze(mean, axes=0) mean = unsqueeze(mean, axes=2) mean = unsqueeze(mean, axes=3) std = unsqueeze(std, axes=0) std = unsqueeze(std, axes=2) std = unsqueeze(std, axes=3) return (x - mean) / std
def set_inputs(self, place): """Set the randomly generated data to the test program. """ self.inputs = {} queries = fluid.Tensor() queries.set(self.queries, place) keys = fluid.Tensor() keys.set(self.keys, place) self.inputs["keys"] = keys self.inputs["queries"] = queries
def func_create_varbase(self): x = np.ones([2, 2], np.float32) y = np.zeros([3, 3], np.float32) t = fluid.Tensor() t.set(x, fluid.CPUPlace()) if not _in_legacy_dygraph(): egr_tmp = fluid.core.eager.Tensor( value=x, place=fluid.core.CPUPlace()) egr_tmp2 = fluid.core.eager.Tensor(y, fluid.core.CPUPlace()) egr_tmp3 = paddle.to_tensor(x) egr_tmp4 = fluid.core.eager.Tensor(y) egr_tmp5 = fluid.core.eager.Tensor(value=x) egr_tmp6 = fluid.core.eager.Tensor(t) self.assertTrue(np.array_equal(x, egr_tmp.numpy())) self.assertTrue(np.array_equal(y, egr_tmp2.numpy())) self.assertTrue(np.array_equal(x, egr_tmp3.numpy())) self.assertTrue(np.array_equal(y, egr_tmp4.numpy())) self.assertTrue(np.array_equal(x, egr_tmp5.numpy())) self.assertTrue(np.array_equal(x, egr_tmp6.numpy())) else: tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace()) tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace()) tmp3 = paddle.to_tensor(x) tmp4 = fluid.core.VarBase(y) tmp5 = fluid.core.VarBase(value=x) tmp6 = fluid.core.VarBase(t) self.assertTrue(np.array_equal(x, tmp.numpy())) self.assertTrue(np.array_equal(y, tmp2.numpy())) self.assertTrue(np.array_equal(x, tmp3.numpy())) self.assertTrue(np.array_equal(y, tmp4.numpy())) self.assertTrue(np.array_equal(x, tmp5.numpy())) self.assertTrue(np.array_equal(x, tmp6.numpy()))
def __reader__(): result = None if is_hs: result = [[], [], [], []] else: result = [[], []] for sample in sample_reader(): for i, fea in enumerate(sample): result[i].append(fea) if len(result[0]) == batch_size: tensor_result = [] for tensor in result: t = fluid.Tensor() dat = np.array(tensor, dtype='int64') if len(dat.shape) > 2: dat = dat.reshape((dat.shape[0], dat.shape[2])) elif len(dat.shape) == 1: dat = dat.reshape((-1, 1)) t.set(dat, fluid.CPUPlace()) tensor_result.append(t) yield tensor_result if is_hs: result = [[], [], [], []] else: result = [[], []]
def func_create_varbase(self): x = np.ones([2, 2], np.float32) y = np.zeros([3, 3], np.float32) t = fluid.Tensor() t.set(x, fluid.CPUPlace()) if _in_eager_mode(): # TODO(jiabin): Support Kwargs and uncomment these tests # egr_tmp = fluid.core.eager.EagerTensor(value=x, place=fluid.core.CPUPlace()) egr_tmp2 = fluid.core.eager.EagerTensor(y, fluid.core.CPUPlace()) egr_tmp3 = paddle.to_tensor(x) egr_tmp4 = fluid.core.eager.EagerTensor(y) # egr_tmp5 = fluid.core.eager.EagerTensor(value=x) # TODO(jiabin): Support it when we merge LoDTensor with DenseTensor egr_tmp6 = fluid.core.eager.EagerTensor(t) # self.assertTrue(np.array_equal(x, egr_tmp.numpy())) self.assertTrue(np.array_equal(y, egr_tmp2.numpy())) self.assertTrue(np.array_equal(x, egr_tmp3.numpy())) self.assertTrue(np.array_equal(y, egr_tmp4.numpy())) # self.assertTrue(np.array_equal(x, egr_tmp5.numpy())) self.assertTrue(np.array_equal(x, egr_tmp6.numpy())) else: tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace()) tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace()) tmp3 = paddle.to_tensor(x) tmp4 = fluid.core.VarBase(y) tmp5 = fluid.core.VarBase(value=x) tmp6 = fluid.core.VarBase(t) self.assertTrue(np.array_equal(x, tmp.numpy())) self.assertTrue(np.array_equal(y, tmp2.numpy())) self.assertTrue(np.array_equal(x, tmp3.numpy())) self.assertTrue(np.array_equal(y, tmp4.numpy())) self.assertTrue(np.array_equal(x, tmp5.numpy())) self.assertTrue(np.array_equal(x, tmp6.numpy()))
def set_inputs(self, place): """Set the randomly generated data to the test program. """ self.inputs = {} tensor = fluid.Tensor() tensor.set(self.data, place) self.inputs[self.data_desc["name"]] = tensor
def set_inputs(self, place): self.inputs = {} for desc in self.data_desc: tensor = fluid.Tensor() tensor.set(self.data[desc[0]][0], place) if self.data[desc[0]][1]: tensor.set_lod(self.data[desc[0]][1]) self.inputs[desc[0]] = tensor
def set_inputs(self, place): self.inputs = {} for desc in self.data_desc: tensor = fluid.Tensor() tensor.set(self.data[desc[0]][0], place) if self.data[desc[0]][1]: tensor.set_recursive_sequence_lengths(self.data[desc[0]][1]) self.inputs[desc[0]] = tensor
def to_tensor(data, place): """ Convert data to paddle tensor """ flattened_data = np.concatenate(data, axis=0).astype("float32") flattened_data = flattened_data.reshape([-1, 768]) res = fluid.Tensor() res.set(flattened_data, place) return res
def paddle_new_tensor(gpu_id, init_flag, realloc_flag, num_realloc): init_mb = init_flag - LEFT_GPU_MB realloc_mb = realloc_flag - LEFT_GPU_MB tensor = fluid.Tensor() tensor.set(np.random.rand(init_mb * MB_TO_NUM_NP), fluid.CUDAPlace(gpu_id)) print("Init alloc %d MB, gpu usage report from fluid: %d" % (init_mb, fluid.core.get_mem_usage(gpu_id))) # Sleep and wait for nvidia-smi subprocess to collect GPU usage time.sleep(COLLECT_GPU_MEM_USAGE_LOOP_MS / 1000) # We don't use for loop when re-alloc, else the tensor will be recycled re_tensor = [fluid.Tensor() for i in range(num_realloc)] for i in range(num_realloc): re_tensor[i].set( np.random.rand(realloc_mb * MB_TO_NUM_NP), fluid.CUDAPlace(gpu_id)) print("Re-alloc %d MB, gpu usage report from fluid: %d" % (realloc_mb, fluid.core.get_mem_usage(gpu_id))) # Sleep and wait for nvidia-smi subprocess to collect GPU usage time.sleep(COLLECT_GPU_MEM_USAGE_LOOP_MS / 1000) time.sleep(COLLECT_GPU_MEM_USAGE_LOOP_MS / 1000)
def __init__(self, ernie, conf, tokenizer, hidden, layer_n, device): super(SoftMaskedErnie, self).__init__() self.embedding = ernie.word_emb self.config = conf embedding_size = self.config['hidden_size'] self.detector = F.layers.gru_unit(self.embedding, hidden, hidden * 3) self.corrector = ernie.encoder_stack t = F.Tensor() mask_token_id = tokenizer.mask_id self.mask_e = self.embedding(mask_token_id) self.linear = Linear(embedding_size, self.config.vocab_size) self.softmax = fluid.layers.log_softmax(self.linear)
def to_lodtensor(data, place): """Convert data in list into lodtensor.""" seq_lens = [len(seq) for seq in data] cur_len = 0 lod = [cur_len] for l in seq_lens: cur_len += l lod.append(cur_len) flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) res = fluid.Tensor() res.set(flattened_data, place) res.set_lod([lod]) return res
def imagenet_norm(x): mean = [0.485, 0.456, 0.406] std = [0.299, 0.224, 0.225] t = fluid.Tensor() mean = t.set(mean, fluid.dygraph.to_variable()) mean = unsqueeze(mean, axes=0) mean = unsqueeze(mean, axes=2) mean = unsqueeze(mean, axes=3) std = t.set(std, fluid.dygraph.to_variable()) std = unsqueeze(std, axes=0) std = unsqueeze(std, axes=2) std = unsqueeze(std, axes=3) return (x - mean) / std
def test_tensor_set_int16(self): array = numpy.random.randint(100, size=(300, 500)).astype("int16") tensor = fluid.Tensor() place = core.CPUPlace() tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array)) place = core.CUDAPinnedPlace() tensor.set(array, place) self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT16) self.assertTrue(numpy.array_equal(numpy.array(tensor), array))
def test_tensor_set_from_array_list(self): array = numpy.random.randint(1000, size=(200, 300)) list_array = [array, array] tensor = fluid.Tensor() place = core.CPUPlace() tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array)) place = core.CUDAPinnedPlace() tensor.set(list_array, place) self.assertEqual([2, 200, 300], tensor.shape()) self.assertTrue(numpy.array_equal(numpy.array(tensor), list_array))
def test_create_VarBase(self): x = np.ones([2, 2], np.float32) y = np.zeros([3, 3], np.float32) t = fluid.Tensor() t.set(x, fluid.CPUPlace()) with fluid.dygraph.guard(): tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace()) tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace()) tmp3 = fluid.dygraph.base.to_variable(x) tmp4 = fluid.core.VarBase(y) tmp5 = fluid.core.VarBase(value=x) tmp6 = fluid.core.VarBase(t) self.assertTrue(np.array_equal(x, tmp.numpy())) self.assertTrue(np.array_equal(y, tmp2.numpy())) self.assertTrue(np.array_equal(x, tmp3.numpy())) self.assertTrue(np.array_equal(y, tmp4.numpy())) self.assertTrue(np.array_equal(x, tmp5.numpy())) self.assertTrue(np.array_equal(x, tmp6.numpy()))
def run_sliece_tensor(self, place): tensor = fluid.Tensor() shape = [3, 3, 3] tensor._set_dims(shape) tensor_array = numpy.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]) tensor.set(tensor_array, place) n1 = tensor[1] t1 = tensor_array[1] self.assertTrue((numpy.array(n1) == numpy.array(t1)).all()) n2 = tensor[1:] t2 = tensor_array[1:] self.assertTrue((numpy.array(n2) == numpy.array(t2)).all()) n3 = tensor[0:2:] t3 = tensor_array[0:2:] self.assertTrue((numpy.array(n3) == numpy.array(t3)).all()) n4 = tensor[2::-2] t4 = tensor_array[2::-2] self.assertTrue((numpy.array(n4) == numpy.array(t4)).all()) n5 = tensor[2::-2][0] t5 = tensor_array[2::-2][0] self.assertTrue((numpy.array(n5) == numpy.array(t5)).all()) n6 = tensor[2:-1:-1] t6 = tensor_array[2:-1:-1] self.assertTrue((numpy.array(n6) == numpy.array(t6)).all()) n7 = tensor[0:, 0:] t7 = tensor_array[0:, 0:] self.assertTrue((numpy.array(n7) == numpy.array(t7)).all()) n8 = tensor[0::1, 0::-1, 2:] t8 = tensor_array[0::1, 0::-1, 2:] self.assertTrue((numpy.array(n8) == numpy.array(t8)).all())
def to_feed(self, place): feed_dict = dict() for iname in self.inputs: lod = [0] np_flatten = [] for seq_id in xrange(len(self.inputs[iname])): seq_len = len(self.inputs[iname][seq_id]) lod.append(lod[-1] + seq_len) np_flatten.extend(self.inputs[iname][seq_id]) t = fluid.Tensor() t.set(numpy.array(np_flatten), place) t.set_lod([lod]) feed_dict[iname] = t for pname in self.params: feed_dict[pname] = self.params[pname] return feed_dict
import numpy as np import paddle.fluid as fluid place = fluid.CPUPlace() data = np.random.random([16]).astype('float32') res = fluid.Tensor() res.set(data, place) print(res.shape()[0])
]) # We can specify head_mask for each layer head_mask = head_mask.to(dtype=next(self.parameters( )).dtype) # switch to fload if need + fp16 compatibility else: head_mask = [None] * self.config.num_hidden_layers return input_ids, position_ids, token_type_ids, inputs_embeds, \ extended_attention_mask, head_mask, encoder_hidden_states, encoder_extended_attention_mask if __name__ == "__main__": parser = propeller.ArgumentParser('model with ERNIE') parser.add_argument('--from_pretrained', type=str, required=True) parser.add_argument('--conf', type=str, required=True) args = parser.parse_args() cfg_file_path = os.path.join(args.conf, 'ernie_config.json') hparams_cli = propeller.parse_hparam(args) hparams_config_file = json.loads(open(cfg_file_path).read()) tokenizer = ErnieTokenizer.from_pretrained(args.from_pretrained) D.guard().__enter__() # activate paddle `dygrpah` mode ernie = ErnieModel.from_pretrained(args.from_pretrained) model = SoftMaskedErnie(ernie, hparams_config_file, tokenizer, 2, 1, 'cpu') text = '中国的' token = tokenizer.tokenize(text) ids = tokenizer.convert_tokens_to_ids(token) print(ids) input_mask = fluid.Tensor([[1, 1, 0]]) segment_ids = fluid.Tensor([[0, 0, 0]]) out = model(ids, input_mask, segment_ids) print(out)
def _net_conf_at(word, sent_mask, intent_label, gates_label, slots, sent_mask1, generates_label): """ Configure the network """ all_word = fluid.Tensor().set( np.array([ i for i in range(data_processor.get_vocab_size('utterance')) ])) all_word_emb = fluid.embedding( input=all_word, size=[ data_processor.get_vocab_size('utterances'), args['word_emb_dim'] ], param_attr=fluid.ParamAttr(name='word_emb', initializer=fluid.initializer.Normal( 0., args['word_emb_dim']**-0.5))) cat_list = [] for batch_word in word: cat_list.append( fluid.layers.gather(input=all_word_emb, index=batch_word)) word_emb = fluid.layers.concat(cat_list, axis=0) word_emb = fluid.layers.scale(x=word_emb, scale=args['word_emb_dim']**0.5) if args['dropout'] > 0.00001: word_emb = fluid.layers.dropout(word_emb, dropout_prob=args['dropout'], seed=None, is_test=False) input_feature = word_emb bigru_output, bigru_last_h = _bigru_layer(input_feature) if args['debug']: bigru_out = fluid.layers.Print(input=bigru_output, message='bigru_output: ') #mask padding tokens sent_mask_r = fluid.layers.reverse(sent_mask, -1) sent_mask_cat = fluid.layers.concat(sent_mask, sent_mask_r) sent_mask_cat = fluid.layers.cast(sent_mask_cat, 'float32') #bigru_output = fluid.layers.elementwise_mul(bigru_output, sent_mask, axis=0) bigru_output = fluid.layers.elementwise_mul(bigru_output, sent_mask_cat, axis=0) sent_rep = fluid.layers.reduce_max(input=bigru_output, dim=-2, keep_dim=False) #sent_rep = fluid.layers.reduce_mean(input=bigru_output, dim=-2, keep_dim=False) if args['debug']: sent_rep = fluid.layers.Print(input=sent_rep, message='sent_rep: ') sent_fc = fluid.layers.fc( input=sent_rep, size=data_processor.get_vocab_size('domain'), param_attr=fluid.ParamAttr( learning_rate=1.0, trainable=True, name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if args['debug']: sent_fc = fluid.layers.Print(input=sent_fc, message='sent_fc: ') ce_loss, intent_probs = fluid.layers.softmax_with_cross_entropy( logits=sent_fc, label=intent_label, return_softmax=True) ################ slot ######################### slot_emb = fluid.embedding( input=slots, size=[data_processor.get_vocab_size('slot'), args['slot_emb_dim']], param_attr=fluid.ParamAttr(name='slot_emb', initializer=fluid.initializer.Normal( 0., args['slot_emb_dim']**-0.5))) slot_emb = fluid.layers.scale(x=slot_emb, scale=args['slot_emb_dim']**0.5) # words = [i for i in range(data_processor.get_vocab_size('utterance'))] gate_prob, generate_prob = _slot_gate(encoder_outs=bigru_output, encoder_last_h=bigru_last_h, slots_embedding=slot_emb, sent_mask=sent_mask1, word_emb=word_emb, story=word) gates_label1 = fluid.layers.transpose(gates_label, perm=[1, 0]) gates_label1 = fluid.layers.reshape( gates_label1, shape=[args['batch_size'] * args['all_slot_num'], -1]) generates_label1 = fluid.layers.reshape( generates_label1, shape=[args['batch_size'], args['all_slot_num'], -1]) generates_label1 = fluid.layers.transpose(generates_label, perm=[1, 0]) generates_label1 = fluid.layers.reshape( generates_label1, shape=[args['batch_size'] * args['all_slot_num'], -1]) generate_prob = fluid.layers.transpose(generate_prob, perm=[0, 1]) ############## slot end ######################### # loss = fluid.layers.mean(x=ce_loss) # accuracy = fluid.layers.accuracy(input=intent_probs, label=intent_label) # if args['debug']: # print ('loss: %s, intent_probs: %s' % (str(loss.shape), str(intent_probs.shape))) # intent_probs = fluid.layers.Print(intent_probs, message='intent_probs: ', summarize=-1) gate_acc, gate_loss, generate_acc, generate_loss = get_slot_acc( gate_prob=gate_prob, gates_label=gates_label1, generate_prob=generate_prob, generates_label=generates_label1) ########## chose loss and acc############### # loss = gate_loss # accuracy = gate_acc ########## chose loss and acc end ########## return gate_loss, gate_acc, intent_probs, gate_prob, generate_loss, generate_acc
def test_tensor_to_variable(self): with fluid.dygraph.guard(): t = fluid.Tensor() t.set(np.random.random((1024, 1024)), fluid.CPUPlace()) var = fluid.dygraph.to_variable(t) self.assertTrue(np.array_equal(t, var.numpy()))