def nucleus_sample(self, text, max_len=32, end_word=None, repitition_penalty=1.0, temperature=1.0, top_k=0, top_p=1.0): with paddle.no_grad(): # 终止标志 if end_word is not None: stop_id = self.tokenizer.encode(end_word) length = len(stop_id) # 初始预测 ids = self.tokenizer.encode(text) input_id = paddle.to_tensor( np.array(ids).reshape(1, -1).astype('int64')) output, cached_kvs = self.model(input_id, use_cache=True) next_token_logits = output[0, -1, :] for id in set(ids): next_token_logits[id] /= repitition_penalty next_token_logits = next_token_logits / temperature next_token_logits[self.tokenizer.encoder['<unk>']] = -float('Inf') filtered_logits = self.top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) next_token = paddle.multinomial(paddle.nn.functional.softmax( filtered_logits, axis=-1), num_samples=1).numpy() ids += [int(next_token)] # 使用缓存进行继续预测 for i in range(max_len - 1): input_id = paddle.to_tensor( np.array([next_token]).reshape(1, -1).astype('int64')) output, cached_kvs = self.model(input_id, use_cache=True, cache=cached_kvs) next_token_logits = output[0, -1, :] for id in set(ids): next_token_logits[id] /= repitition_penalty next_token_logits = next_token_logits / temperature next_token_logits[ self.tokenizer.encoder['<unk>']] = -float('Inf') filtered_logits = self.top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) next_token = paddle.multinomial(paddle.nn.functional.softmax( filtered_logits, axis=-1), num_samples=1).numpy() ids += [int(next_token)] # 根据终止标志停止预测 if (end_word is not None) and (ids[-length:] == stop_id): break return self.tokenizer.decode(ids)
def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): return # Different GPU generatte different random value. Only test V100 here. if not "V100" in paddle.device.cuda.get_device_name(): return print("Test Fixed Random number on V100 GPU------>") paddle.disable_static() paddle.set_device('gpu') paddle.seed(100) x = paddle.randint(0, 100, [1024, 10000]).astype('float32') y = paddle.multinomial(x, 1, replacement=False).numpy() self.assertEqual(np.sum(y), 5187793) self.assertEqual(np.mean(y), 5066.2041015625) expect = [9982, 1655, 4741, 1323, 9319, 3298, 6473, 7477, 2507, 2628] self.assertTrue(np.array_equal(y[100:110, :].flatten(), expect)) y = paddle.multinomial(x, 5000, replacement=False).numpy() self.assertEqual(np.sum(y), 25603962316) self.assertEqual(np.mean(y), 5000.77388984375) expect = [7300, 6055, 8714, 5401, 7360, 161, 5035, 7002, 6788, 2916] self.assertTrue(np.array_equal(y[100, 1000:1010], expect)) y = paddle.multinomial(x, 5000, replacement=False).numpy() self.assertEqual(np.sum(y), 25592855710) self.assertEqual(np.mean(y), 4998.604630859375) expect = [5700, 6567, 4399, 5688, 7472, 545, 6894, 526, 2124, 385] self.assertTrue(np.array_equal(y[300, 3000:3010], expect)) y = paddle.multinomial(x, 20000, replacement=True).numpy() self.assertEqual(np.sum(y), 102371362581) self.assertEqual(np.mean(y), 4998.60168852539) self.assertEqual(np.std(y), 2886.316308500771) expect = [7630, 8235, 8445, 3275, 5580, 4591, 1331, 342, 1662, 7156] self.assertTrue(np.array_equal(y[100, 0:10], expect)) y = paddle.multinomial(x, 20000, replacement=True).numpy() self.assertEqual(np.sum(y), 102400672117) self.assertEqual(np.mean(y), 5000.032818212891) self.assertEqual(np.std(y), 2886.913426124017) expect = [4159, 7849, 9305, 5759, 4422, 122, 345, 2897, 5200, 5911] self.assertTrue(np.array_equal(y[100, 0:10], expect)) paddle.enable_static()
def test_alias(self): paddle.set_device('npu:0') x = paddle.rand([4]) out1 = paddle.multinomial(x, num_samples=10, replacement=True) out2 = paddle.tensor.multinomial(x, num_samples=10, replacement=True) out3 = paddle.tensor.random.multinomial(x, num_samples=10, replacement=True)
def sample(self, labels): """Random sample neg_samples """ n_sample = self.n_sample n_tries = 1 * n_sample neg_samples = paddle.multinomial(self.new_prob, num_samples=n_sample, replacement=self.unique is False) true_log_probs = paddle.gather(self.log_q, labels) samp_log_probs = paddle.gather(self.log_q, neg_samples) return true_log_probs, samp_log_probs, neg_samples
def decode(self, inputs, caches): tgt_ids = inputs['tgt_ids'] tgt_pos = inputs['tgt_pos'] tgt_generation_mask = inputs['tgt_generation_mask'] predictions = tgt_ids # TODO step = 0 while step < self.max_dec_len: # [-1, 1] append_mask = paddle.cast( tgt_ids != self.eos_id, dtype=tgt_generation_mask.dtype) tgt_generation_mask = paddle.concat( [tgt_generation_mask, paddle.unsqueeze(append_mask, 1)], axis=-1) tgt_sent = paddle.ones( [tgt_generation_mask.shape[0], 1], dtype=tgt_ids.dtype) # [-1, 1, hidden_size] out, caches = self.plato2_encoder(caches, tgt_ids, tgt_sent, tgt_pos, tgt_generation_mask) out = paddle.squeeze(out, axis=1) # [-1, hidden_size] trans = self.logits_fc_layer(out) trans = self.gelu_layer(trans) trans = self.logits_layer_norm(trans) # [-1, vocab_size] logits = paddle.matmul( trans, self.plato2_encoder.word_embedding_layer.weight, transpose_y=True) + self.logits_bias logits[:, self.unk_id] = -1e9 logits[:, self.bos_id] = -1e9 logits[:, self.mask_id] = -1e9 if step < self.min_dec_len: logits[:, self.eos_id] = -1e9 logits = logits * append_mask + (1 - append_mask) * self.after_eos probs = self.softmax(logits) # [-1, topk] topk_probs, _ = paddle.topk(probs, k=self.topk) mask = paddle.cast(probs >= topk_probs[:, -1:], 'float32') sums = paddle.sum(topk_probs, axis=-1, keepdim=True) new_probs = probs * mask / sums # [-1, 1] sampling_ids = paddle.multinomial(new_probs) step = step + 1 tgt_ids = sampling_ids tgt_pos = tgt_pos + 1 predictions = paddle.concat([predictions, tgt_ids], axis=1) return predictions
def test_dygraph3(self): # replacement is False. number of samples must be less than number of categories. paddle.disable_static() x_numpy = np.random.rand(1000) x = paddle.to_tensor(x_numpy) out = paddle.multinomial(x, num_samples=100, replacement=False) unique_out = np.unique(out.numpy()) self.assertEqual( len(unique_out), 100, "replacement is False. categories can't be sampled repeatedly") paddle.enable_static()
def test_input_probs_dim(self): def test_dim_larger_than_2(): x = paddle.rand([2, 3, 3]) paddle.multinomial(x) self.assertRaises(ValueError, test_dim_larger_than_2) def test_dim_less_than_1(): x_np = np.random.random([]) x = paddle.to_tensor(x_np) paddle.multinomial(x) self.assertRaises(ValueError, test_dim_less_than_1) with self.assertRaises(ValueError): y = paddle.multinomial(paddle.to_tensor([1., 2., -3.])) with self.assertRaises(ValueError): prob = paddle.rand([20, 1000]) prob[1:0] = 0 y = paddle.multinomial(prob)
def test_dygraph(self): # input probability is a vector, and replacement is True paddle.disable_static() x_numpy = np.random.rand(4) x = paddle.to_tensor(x_numpy) out = paddle.multinomial(x, num_samples=100000, replacement=True) paddle.enable_static() sample_prob = sample_output_one_dimension(out.numpy(), 4) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob))
def sample(self, labels): n_sample = self.n_sample n_tries = 2 * n_sample batch_size = labels.shape[0] with paddle.no_grad(): neg_samples = paddle.unique( paddle.multinomial(self.dist, n_tries, replacement=True)) true_log_probs = paddle.gather(self.log_q, labels.flatten()) true_log_probs = paddle.reshape(true_log_probs, shape=[batch_size, -1]) samp_log_probs = paddle.gather(self.log_q, neg_samples) return true_log_probs, samp_log_probs, neg_samples
def test_static(self): paddle.set_device('npu:0') startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): x = fluid.data('x', shape=[4], dtype='float32') out = paddle.multinomial(x, num_samples=100000, replacement=True) place = fluid.NPUPlace(0) exe = fluid.Executor(place) exe.run(startup_program) x_np = np.random.rand(4).astype('float32') out = exe.run(train_program, feed={'x': x_np}, fetch_list=[out]) sample_prob = sample_output_one_dimension(out, 4) prob = x_np / x_np.sum(axis=-1, keepdims=True) self.assertTrue( np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob))
def forward(self, trg): # Encoder latent_z = paddle.normal(shape=(trg.shape[0], self.latent_size)) dec_first_hidden_cell = self.fc(latent_z) dec_first_hidden, dec_first_cell = paddle.split( dec_first_hidden_cell, 2, axis=-1) if self.num_layers > 1: dec_first_hidden = paddle.split(dec_first_hidden, self.num_layers) dec_first_cell = paddle.split(dec_first_cell, self.num_layers) else: dec_first_hidden = [dec_first_hidden] dec_first_cell = [dec_first_cell] dec_initial_states = [[h, c] for h, c in zip(dec_first_hidden, dec_first_cell)] output_fc = lambda x: F.one_hot( paddle.multinomial( F.softmax(paddle.squeeze( self.decoder.output_fc(x),[1]))),num_classes=self.vocab_size) latent_z = nn.BeamSearchDecoder.tile_beam_merge_with_batch( latent_z, self.beam_size) decoder = nn.BeamSearchDecoder( cell=self.decoder.lstm.cell, start_token=self.start_token, end_token=self.end_token, beam_size=self.beam_size, embedding_fn=self.decoder.trg_embedder, output_fn=output_fc) outputs, _ = nn.dynamic_decode( decoder, inits=dec_initial_states, max_step_num=self.max_out_len, latent_z=latent_z) return outputs
def decode(self, x_tree_vecs, prob_decode): """ Decode tree structre from tree latent space. Args: x_tree_mess(tensor): tree latent represenation. prob_decode(bool): using bernoulli distribution in tree decode if prob_decode=true. Returns: root node and all nodes. """ assert x_tree_vecs.shape[0] == 1 stack = [] init_hiddens = paddle.zeros([1, self.hidden_size]) zero_pad = paddle.zeros([1, 1, self.hidden_size]) contexts = paddle.zeros([1]).astype('int64') root_score = self.aggregate(init_hiddens, contexts, x_tree_vecs, 'word') root_wid = paddle.argmax(root_score, axis=1) root_wid = int(root_wid.numpy()) root = MolTreeNode(self.vocab.get_smiles(root_wid)) root.wid = root_wid root.idx = 0 stack.append((root, self.vocab.get_slots(root.wid))) all_nodes = [root] h = {} for step in range(MAX_DECODE_LEN): node_x, fa_slot = stack[-1] cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors] if len(cur_h_nei) > 0: cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size]) else: cur_h_nei = zero_pad cur_x = paddle.to_tensor([node_x.wid]) cur_x = self.embedding(cur_x) cur_h = paddle.sum(cur_h_nei, axis=1) stop_hiddens = paddle.concat([cur_x, cur_h], axis=1) stop_hiddens = F.relu(self.U_i(stop_hiddens)) stop_score = self.aggregate(stop_hiddens, contexts, x_tree_vecs, 'stop') if prob_decode: backtrack = (paddle.bernoulli(F.sigmoid(stop_score)).item() == 0) else: backtrack = (float(stop_score.numpy()) < 0) if not backtrack: new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h) pred_score = self.aggregate(new_h, contexts, x_tree_vecs, 'word') if prob_decode: sort_wid = paddle.multinomial(F.softmax(pred_score, axis=1).squeeze(), 5) else: sort_wid = paddle.argsort( pred_score, axis=1, descending=True) sort_wid = sort_wid.squeeze() next_wid = None for wid in sort_wid[:5]: slots = self.vocab.get_slots(wid) node_y = MolTreeNode(self.vocab.get_smiles(wid)) if have_slots(fa_slot, slots) and can_assemble(node_x, node_y): next_wid = wid next_slots = slots break if next_wid is None: backtrack = True else: node_y = MolTreeNode(self.vocab.get_smiles(next_wid)) node_y.wid = int(next_wid.numpy()) node_y.idx = len(all_nodes) node_y.neighbors.append(node_x) h[(node_x.idx, node_y.idx)] = new_h[0] stack.append((node_y, next_slots)) all_nodes.append(node_y) if backtrack: if len(stack) == 1: break node_fa, _ = stack[-2] cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx] if len(cur_h_nei) > 0: cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size]) else: cur_h_nei = zero_pad new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h) h[(node_x.idx, node_fa.idx)] = new_h[0] node_fa.neighbors.append(node_x) stack.pop() return root, all_nodes
def sample(self, n_batch, max_len=100, z=None, temp=1.0): """Generating n_batch samples in eval mode (`z` could be not on same device) :param n_batch: number of sentences to generate :param max_len: max len of samples :param z: (n_batch, d_z) of floats, latent vector z or None :param temp: temperature of softmax :return: list of tensors of strings, samples sequence x """ if z is None: z = self.sample_z_prior(n_batch) z_0 = z.unsqueeze(1) # Initial values h = self.decoder_lat(z) h = paddle.expand(h.unsqueeze(0), \ shape=[self.decoder_rnn.num_layers, h.unsqueeze(0).shape[1], h.unsqueeze(0).shape[2]]) w = paddle.expand(paddle.to_tensor(self.bos), shape=[n_batch]) x = paddle.expand(paddle.to_tensor([self.pad]), shape=[n_batch, max_len]) x[:, 0] = self.bos end_pads = paddle.expand(paddle.to_tensor([max_len]), shape=[n_batch]) eos_mask = paddle.zeros([n_batch], dtype='bool') # Generating cycle for i in range(1, max_len): x_emb = self.x_emb(w).unsqueeze(1) x_input = paddle.concat([x_emb, z_0], axis=-1) o, h = self.decoder_rnn(x_input, h) y = self.decoder_fc(o.squeeze(1)) y = F.softmax(y / temp, axis=-1) w = paddle.multinomial(y, 1)[:, 0] #w = paddle.argmax(y, 1) # convert to numpy in order to slice the mask x = x.numpy() eos_mask = eos_mask.numpy() w = w.numpy() end_pads = end_pads.numpy() x[~eos_mask, i] = w[~eos_mask] i_eos_mask = ~eos_mask & (w == self.eos) end_pads[i_eos_mask] = i + 1 eos_mask = eos_mask | i_eos_mask # convert back to tensor x = paddle.to_tensor(x) w = paddle.to_tensor(w) eos_mask = paddle.to_tensor(eos_mask) end_pads = paddle.to_tensor(end_pads) # Converting `x` to list of tensors new_x = [] for i in range(x.shape[0]): new_x.append(x[i, :int(end_pads[i])]) return [self.tensor2string(i_x) for i_x in new_x]
def dfs_assemble(self, y_tree_mess, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node, prob_decode, check_aroma): """DFS in subgraph assembly""" fa_nid = fa_node.nid if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children = [nei for nei in cur_node.neighbors if nei.nid != fa_nid] neighbors = [nei for nei in children if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x: x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in children if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [(fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node.nid] cands, aroma_score = enum_assemble(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0 or (sum(aroma_score) < 0 and check_aroma): return None, cur_mol cand_smiles, cand_amap = zip(*cands) aroma_score = paddle.to_tensor(aroma_score) cands = [(smiles, all_nodes, cur_node) for smiles in cand_smiles] if len(cands) > 1: jtmpn_holder = JTMPN.tensorize(cands, y_tree_mess[1]) fatoms = jtmpn_holder['fatoms'] fbonds = jtmpn_holder['fbonds'] agraph = jtmpn_holder['agraph'] bgraph = jtmpn_holder['bgraph'] scope = jtmpn_holder['scope'] cand_vecs = self.jtmpn(fatoms, fbonds, agraph, bgraph, scope, y_tree_mess[0]) scores = paddle.mv(cand_vecs, x_mol_vecs) + aroma_score else: scores = paddle.to_tensor([1.0]) if prob_decode: probs = paddle.squeeze(F.softmax(paddle.reshape(scores, shape=[1, -1]), axis=1)) + 1e-7 cand_idx = paddle.multinomial(probs, probs.numel()) else: cand_idx = paddle.argsort(scores, descending=True) backup_mol = Chem.RWMol(cur_mol) pre_mol = cur_mol for i in range(cand_idx.numel()): cur_mol = Chem.RWMol(backup_mol) pred_amap = cand_amap[int(cand_idx[i].numpy())] new_global_amap = copy.deepcopy(global_amap) for nei_id, ctr_atom, nei_atom in pred_amap: if nei_id == fa_nid: continue new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node.nid][ctr_atom] cur_mol = attach_mols(cur_mol, children, [], new_global_amap) new_mol = cur_mol.GetMol() new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol)) if new_mol is None: continue has_error = False for nei_node in children: if nei_node.is_leaf: continue tmp_mol, tmp_mol2 = self.dfs_assemble(y_tree_mess, x_mol_vecs, all_nodes, cur_mol, new_global_amap, pred_amap, nei_node, cur_node, prob_decode, check_aroma) if tmp_mol is None: has_error = True if i == 0: pre_mol = tmp_mol2 break cur_mol = tmp_mol if not has_error: return cur_mol, cur_mol return None, pre_mol
def sample(self, input_ids, logits_processors, max_length, pad_token_id, eos_token_id, top_k=None, top_p=None, temperature=None, min_tokens_to_keep=1, **model_kwargs): def TopKProcess(probs, top_k, min_tokens_to_keep): top_k = min(max(top_k, min_tokens_to_keep), probs.shape[-1]) # Remove all tokens with a probability less than the last token of the top-k topk_probs, _ = paddle.topk(probs, k=top_k) probs = paddle.where(probs >= topk_probs[:, -1:], probs, paddle.full_like(probs, 0.0)) return probs def TopPProcess(probs, top_p, min_tokens_to_keep): sorted_probs = paddle.sort(probs, descending=True) sorted_indices = paddle.argsort(probs, descending=True) cumulative_probs = paddle.cumsum(sorted_probs, axis=-1) # Remove tokens with cumulative probs above the top_p, But keep at # least min_tokens_to_keep tokens sorted_indices_to_remove = cumulative_probs > top_p if min_tokens_to_keep > 1: # Set 'min_tokens_to_keep - 1' because the first token is kept sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0 # Keep the first token sorted_indices_to_remove = paddle.cast(sorted_indices_to_remove, dtype='int64') sorted_indices_to_remove[:, 1:] = ( sorted_indices_to_remove[:, :-1].clone()) sorted_indices_to_remove[:, 0] = 0 # Scatter sorted tensors to original indexing sorted_indices = sorted_indices + paddle.arange( probs.shape[0]).unsqueeze(-1) * probs.shape[-1] condition = paddle.scatter(sorted_indices_to_remove.flatten(), sorted_indices.flatten(), sorted_indices_to_remove.flatten()) condition = paddle.cast(condition, 'bool').reshape(probs.shape) probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs) return probs batch_size, cur_len = input_ids.shape origin_len = cur_len unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool') scores = paddle.full([batch_size, 1], 0.0, dtype=paddle.get_default_dtype()) while cur_len < max_length: # prepare model inputs & get model output model_inputs = self.prepare_inputs_for_generation( input_ids, **model_kwargs) outputs = self(**model_inputs) logits = outputs[0] if isinstance(outputs, tuple) else outputs # [batch_size, vocab_size] logits = logits[:, -1, :] # pre-process distribution logits = self.adjust_logits_during_generation(logits) logits = logits_processors(input_ids, logits) # sample origin_probs = F.softmax(logits) origin_probs = paddle.log(origin_probs) if temperature is not None and temperature != 1.0: logits = logits / temperature probs = F.softmax(logits) if top_k is not None and top_k != 0: probs = TopKProcess(probs, top_k, min_tokens_to_keep) if top_p is not None and top_p < 1.0: probs = TopPProcess(probs, top_p, min_tokens_to_keep) next_tokens = paddle.multinomial(probs) next_scores = paddle.index_sample(origin_probs, next_tokens) if eos_token_id is not None: next_tokens = paddle.where( unfinished_flag, next_tokens, paddle.full_like(next_tokens, pad_token_id)) scores = self.update_scores_for_generation(scores, next_scores, cur_len - origin_len, unfinished_flag) cur_len += 1 input_ids = paddle.concat([input_ids, next_tokens], axis=1) if eos_token_id is not None: unfinished_flag = paddle.logical_and( unfinished_flag, next_tokens != eos_token_id) # Stop when there is a </s> in all sentences if not paddle.any(unfinished_flag): break model_kwargs = self.update_model_kwargs_for_generation( outputs, model_kwargs) return input_ids[:, origin_len:], scores
def test_dim_less_than_1(): x_np = np.random.random([]) x = paddle.to_tensor(x_np) out = paddle.multinomial(x)
def test_dim_larger_than_2(): x = paddle.rand([2, 3, 3]) out = paddle.multinomial(x)
def test_num_sample_less_than_0(): x = paddle.rand([4]) out = paddle.multinomial(x, num_samples=-2)
def test_alias(self): paddle.disable_static() x = paddle.rand([4]) paddle.multinomial(x, num_samples=10, replacement=True) paddle.tensor.multinomial(x, num_samples=10, replacement=True) paddle.tensor.random.multinomial(x, num_samples=10, replacement=True)
def test_samples_larger_than_categories(): x = paddle.rand([4]) paddle.multinomial(x, num_samples=5, replacement=False)